core/src/main/java/org/apache/accumulo/core/client/ScannerBase.java - accumulo - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.accumulo.core.client;

 import java.util.Iterator;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.concurrent.TimeUnit;

 import org.apache.accumulo.core.client.IteratorSetting.Column;
 import org.apache.accumulo.core.client.sample.SamplerConfiguration;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Value;
 import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.core.spi.scan.HintScanPrioritizer;
 import org.apache.accumulo.core.spi.scan.ScanDispatcher;
 import org.apache.accumulo.core.spi.scan.ScanInfo;
 import org.apache.accumulo.core.spi.scan.ScanPrioritizer;
 import org.apache.accumulo.core.spi.scan.SimpleScanDispatcher;
 import org.apache.hadoop.io.Text;

 /**
  * This class hosts configuration methods that are shared between different types of scanners.
  */
 public interface ScannerBase extends Iterable<Entry<Key,Value>>, AutoCloseable {

   /**
    * Add a server-side scan iterator.
    *
    * @param cfg
    *          fully specified scan-time iterator, including all options for the iterator. Any
    *          changes to the iterator setting after this call are not propagated to the stored
    *          iterator.
    * @throws IllegalArgumentException
    *           if the setting conflicts with existing iterators
    */
   void addScanIterator(IteratorSetting cfg);

   /**
    * Remove an iterator from the list of iterators.
    *
    * @param iteratorName
    *          nickname used for the iterator
    */
   void removeScanIterator(String iteratorName);

   /**
    * Update the options for an iterator. Note that this does <b>not</b> change the iterator options
    * during a scan, it just replaces the given option on a configured iterator before a scan is
    * started.
    *
    * @param iteratorName
    *          the name of the iterator to change
    * @param key
    *          the name of the option
    * @param value
    *          the new value for the named option
    */
   void updateScanIteratorOption(String iteratorName, String key, String value);

   /**
    * Adds a column family to the list of columns that will be fetched by this scanner. By default
    * when no columns have been added the scanner fetches all columns. To fetch multiple column
    * families call this function multiple times.
    *
    * <p>
    * This can help limit which locality groups are read on the server side.
    *
    * <p>
    * When used in conjunction with custom iterators, the set of column families fetched is passed to
    * the top iterator's seek method. Custom iterators may change this set of column families when
    * calling seek on their source.
    *
    * @param col
    *          the column family to be fetched
    */
   void fetchColumnFamily(Text col);

   /**
    * Adds a column to the list of columns that will be fetched by this scanner. The column is
    * identified by family and qualifier. By default when no columns have been added the scanner
    * fetches all columns.
    *
    * <p>
    * <b>WARNING</b>. Using this method with custom iterators may have unexpected results. Iterators
    * have control over which column families are fetched. However iterators have no control over
    * which column qualifiers are fetched. When this method is called it activates a system iterator
    * that only allows the requested family/qualifier pairs through. This low level filtering
    * prevents custom iterators from requesting additional column families when calling seek.
    *
    * <p>
    * For an example, assume fetchColumns(A, Q1) and fetchColumns(B,Q1) is called on a scanner and a
    * custom iterator is configured. The families (A,B) will be passed to the seek method of the
    * custom iterator. If the custom iterator seeks its source iterator using the families (A,B,C),
    * it will never see any data from C because the system iterator filtering A:Q1 and B:Q1 will
    * prevent the C family from getting through. ACCUMULO-3905 also has an example of the type of
    * problem this method can cause.
    *
    * <p>
    * tl;dr If using a custom iterator with a seek method that adds column families, then may want to
    * avoid using this method.
    *
    * @param colFam
    *          the column family of the column to be fetched
    * @param colQual
    *          the column qualifier of the column to be fetched
    */
   void fetchColumn(Text colFam, Text colQual);

   /**
    * Adds a column to the list of columns that will be fetch by this scanner.
    *
    * @param column
    *          the {@link Column} to fetch
    * @since 1.7.0
    */
   void fetchColumn(Column column);

   /**
    * Clears the columns to be fetched (useful for resetting the scanner for reuse). Once cleared,
    * the scanner will fetch all columns.
    */
   void clearColumns();

   /**
    * Clears scan iterators prior to returning a scanner to the pool.
    */
   void clearScanIterators();

   /**
    * Returns an iterator over an accumulo table. This iterator uses the options that are currently
    * set for its lifetime, so setting options will have no effect on existing iterators.
    *
    * <p>
    * Keys returned by the iterator are not guaranteed to be in sorted order.
    *
    * @return an iterator over Key,Value pairs which meet the restrictions set on the scanner
    */
   @Override
   Iterator<Entry<Key,Value>> iterator();

   /**
    * This setting determines how long a scanner will automatically retry when a failure occurs. By
    * default, a scanner will retry forever.
    *
    * <p>
    * Setting the timeout to zero (with any time unit) or {@link Long#MAX_VALUE} (with
    * {@link TimeUnit#MILLISECONDS}) means no timeout.
    *
    * @param timeOut
    *          the length of the timeout
    * @param timeUnit
    *          the units of the timeout
    * @since 1.5.0
    */
   void setTimeout(long timeOut, TimeUnit timeUnit);

   /**
    * Returns the setting for how long a scanner will automatically retry when a failure occurs.
    *
    * @return the timeout configured for this scanner
    * @since 1.5.0
    */
   long getTimeout(TimeUnit timeUnit);

   /**
    * Closes any underlying connections on the scanner. This may invalidate any iterators derived
    * from the Scanner, causing them to throw exceptions.
    *
    * @since 1.5.0
    */
   @Override
   void close();

   /**
    * Returns the authorizations that have been set on the scanner
    *
    * @since 1.7.0
    * @return The authorizations set on the scanner instance
    */
   Authorizations getAuthorizations();

   /**
    * Setting this will cause the scanner to read sample data, as long as that sample data was
    * generated with the given configuration. By default this is not set and all data is read.
    *
    * <p>
    * One way to use this method is as follows, where the sampler configuration is obtained from the
    * table configuration. Sample data can be generated in many different ways, so its important to
    * verify the sample data configuration meets expectations.
    *
    * <pre>
    * <code>
    *   // could cache this if creating many scanners to avoid RPCs.
    *   SamplerConfiguration samplerConfig =
    *     client.tableOperations().getSamplerConfiguration(table);
    *   // verify table's sample data is generated in an expected way before using
    *   userCode.verifySamplerConfig(samplerConfig);
    *   scanner.setSamplerConfiguration(samplerConfig);
    * </code>
    * </pre>
    *
    * <p>
    * Of course this is not the only way to obtain a {@link SamplerConfiguration}, it could be a
    * constant, configuration, etc.
    *
    * <p>
    * If sample data is not present or sample data was generated with a different configuration, then
    * the scanner iterator will throw a {@link SampleNotPresentException}. Also if a table's sampler
    * configuration is changed while a scanner is iterating over a table, a
    * {@link SampleNotPresentException} may be thrown.
    *
    * @since 1.8.0
    */
   void setSamplerConfiguration(SamplerConfiguration samplerConfig);

   /**
    * @return currently set sampler configuration. Returns null if no sampler configuration is set.
    * @since 1.8.0
    */
   SamplerConfiguration getSamplerConfiguration();

   /**
    * Clears sampler configuration making a scanner read all data. After calling this,
    * {@link #getSamplerConfiguration()} should return null.
    *
    * @since 1.8.0
    */
   void clearSamplerConfiguration();

   /**
    * This setting determines how long a scanner will wait to fill the returned batch. By default, a
    * scanner wait until the batch is full.
    *
    * <p>
    * Setting the timeout to zero (with any time unit) or {@link Long#MAX_VALUE} (with
    * {@link TimeUnit#MILLISECONDS}) means no timeout.
    *
    * @param timeOut
    *          the length of the timeout
    * @param timeUnit
    *          the units of the timeout
    * @since 1.8.0
    */
   void setBatchTimeout(long timeOut, TimeUnit timeUnit);

   /**
    * Returns the timeout to fill a batch in the given TimeUnit.
    *
    * @return the batch timeout configured for this scanner
    * @since 1.8.0
    */
   long getBatchTimeout(TimeUnit timeUnit);

   /**
    * Sets the name of the classloader context on this scanner. See the administration chapter of the
    * user manual for details on how to configure and use classloader contexts.
    *
    * @param classLoaderContext
    *          name of the classloader context
    * @throws NullPointerException
    *           if context is null
    * @since 1.8.0
    */
   void setClassLoaderContext(String classLoaderContext);

   /**
    * Clears the current classloader context set on this scanner
    *
    * @since 1.8.0
    */
   void clearClassLoaderContext();

   /**
    * Returns the name of the current classloader context set on this scanner
    *
    * @return name of the current context
    * @since 1.8.0
    */
   String getClassLoaderContext();

   /**
    * Set hints for the configured {@link ScanPrioritizer} and {@link ScanDispatcher}. These hints
    * are available on the server side via {@link ScanInfo#getExecutionHints()} Depending on the
    * configuration, these hints may be ignored. Hints will never impact what data is returned by a
    * scan, only how quickly it is returned.
    *
    * <p>
    * The default configuration for Accumulo will ignore hints. See {@link HintScanPrioritizer} and
    * {@link SimpleScanDispatcher} for examples of classes that can react to hints.
    *
    * @since 2.0.0
    */
   void setExecutionHints(Map<String,String> hints);
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.accumulo.core.client;

	import java.util.Iterator;
	import java.util.Map;
	import java.util.Map.Entry;
	import java.util.concurrent.TimeUnit;

	import org.apache.accumulo.core.client.IteratorSetting.Column;
	import org.apache.accumulo.core.client.sample.SamplerConfiguration;
	import org.apache.accumulo.core.data.Key;
	import org.apache.accumulo.core.data.Value;
	import org.apache.accumulo.core.security.Authorizations;
	import org.apache.accumulo.core.spi.scan.HintScanPrioritizer;
	import org.apache.accumulo.core.spi.scan.ScanDispatcher;
	import org.apache.accumulo.core.spi.scan.ScanInfo;
	import org.apache.accumulo.core.spi.scan.ScanPrioritizer;
	import org.apache.accumulo.core.spi.scan.SimpleScanDispatcher;
	import org.apache.hadoop.io.Text;

	/**
	* This class hosts configuration methods that are shared between different types of scanners.
	*/
	public interface ScannerBase extends Iterable<Entry<Key,Value>>, AutoCloseable {

	/**
	* Add a server-side scan iterator.
	*
	* @param cfg
	* fully specified scan-time iterator, including all options for the iterator. Any
	* changes to the iterator setting after this call are not propagated to the stored
	* iterator.
	* @throws IllegalArgumentException
	* if the setting conflicts with existing iterators
	*/
	void addScanIterator(IteratorSetting cfg);

	/**
	* Remove an iterator from the list of iterators.
	*
	* @param iteratorName
	* nickname used for the iterator
	*/
	void removeScanIterator(String iteratorName);

	/**
	* Update the options for an iterator. Note that this does <b>not</b> change the iterator options
	* during a scan, it just replaces the given option on a configured iterator before a scan is
	* started.
	*
	* @param iteratorName
	* the name of the iterator to change
	* @param key
	* the name of the option
	* @param value
	* the new value for the named option
	*/
	void updateScanIteratorOption(String iteratorName, String key, String value);

	/**
	* Adds a column family to the list of columns that will be fetched by this scanner. By default
	* when no columns have been added the scanner fetches all columns. To fetch multiple column
	* families call this function multiple times.
	*
	* <p>
	* This can help limit which locality groups are read on the server side.
	*
	* <p>
	* When used in conjunction with custom iterators, the set of column families fetched is passed to
	* the top iterator's seek method. Custom iterators may change this set of column families when
	* calling seek on their source.
	*
	* @param col
	* the column family to be fetched
	*/
	void fetchColumnFamily(Text col);

	/**
	* Adds a column to the list of columns that will be fetched by this scanner. The column is
	* identified by family and qualifier. By default when no columns have been added the scanner
	* fetches all columns.
	*
	* <p>
	* <b>WARNING</b>. Using this method with custom iterators may have unexpected results. Iterators
	* have control over which column families are fetched. However iterators have no control over
	* which column qualifiers are fetched. When this method is called it activates a system iterator
	* that only allows the requested family/qualifier pairs through. This low level filtering
	* prevents custom iterators from requesting additional column families when calling seek.
	*
	* <p>
	* For an example, assume fetchColumns(A, Q1) and fetchColumns(B,Q1) is called on a scanner and a
	* custom iterator is configured. The families (A,B) will be passed to the seek method of the
	* custom iterator. If the custom iterator seeks its source iterator using the families (A,B,C),
	* it will never see any data from C because the system iterator filtering A:Q1 and B:Q1 will
	* prevent the C family from getting through. ACCUMULO-3905 also has an example of the type of
	* problem this method can cause.
	*
	* <p>
	* tl;dr If using a custom iterator with a seek method that adds column families, then may want to
	* avoid using this method.
	*
	* @param colFam
	* the column family of the column to be fetched
	* @param colQual
	* the column qualifier of the column to be fetched
	*/
	void fetchColumn(Text colFam, Text colQual);

	/**
	* Adds a column to the list of columns that will be fetch by this scanner.
	*
	* @param column
	* the {@link Column} to fetch
	* @since 1.7.0
	*/
	void fetchColumn(Column column);

	/**
	* Clears the columns to be fetched (useful for resetting the scanner for reuse). Once cleared,
	* the scanner will fetch all columns.
	*/
	void clearColumns();

	/**
	* Clears scan iterators prior to returning a scanner to the pool.
	*/
	void clearScanIterators();

	/**
	* Returns an iterator over an accumulo table. This iterator uses the options that are currently
	* set for its lifetime, so setting options will have no effect on existing iterators.
	*
	* <p>
	* Keys returned by the iterator are not guaranteed to be in sorted order.
	*
	* @return an iterator over Key,Value pairs which meet the restrictions set on the scanner
	*/
	@Override
	Iterator<Entry<Key,Value>> iterator();

	/**
	* This setting determines how long a scanner will automatically retry when a failure occurs. By
	* default, a scanner will retry forever.
	*
	* <p>
	* Setting the timeout to zero (with any time unit) or {@link Long#MAX_VALUE} (with
	* {@link TimeUnit#MILLISECONDS}) means no timeout.
	*
	* @param timeOut
	* the length of the timeout
	* @param timeUnit
	* the units of the timeout
	* @since 1.5.0
	*/
	void setTimeout(long timeOut, TimeUnit timeUnit);

	/**
	* Returns the setting for how long a scanner will automatically retry when a failure occurs.
	*
	* @return the timeout configured for this scanner
	* @since 1.5.0
	*/
	long getTimeout(TimeUnit timeUnit);

	/**
	* Closes any underlying connections on the scanner. This may invalidate any iterators derived
	* from the Scanner, causing them to throw exceptions.
	*
	* @since 1.5.0
	*/
	@Override
	void close();

	/**
	* Returns the authorizations that have been set on the scanner
	*
	* @since 1.7.0
	* @return The authorizations set on the scanner instance
	*/
	Authorizations getAuthorizations();

	/**
	* Setting this will cause the scanner to read sample data, as long as that sample data was
	* generated with the given configuration. By default this is not set and all data is read.
	*
	* <p>
	* One way to use this method is as follows, where the sampler configuration is obtained from the
	* table configuration. Sample data can be generated in many different ways, so its important to
	* verify the sample data configuration meets expectations.
	*
	* <pre>
	* <code>
	* // could cache this if creating many scanners to avoid RPCs.
	* SamplerConfiguration samplerConfig =
	* client.tableOperations().getSamplerConfiguration(table);
	* // verify table's sample data is generated in an expected way before using
	* userCode.verifySamplerConfig(samplerConfig);
	* scanner.setSamplerConfiguration(samplerConfig);
	* </code>
	* </pre>
	*
	* <p>
	* Of course this is not the only way to obtain a {@link SamplerConfiguration}, it could be a
	* constant, configuration, etc.
	*
	* <p>
	* If sample data is not present or sample data was generated with a different configuration, then
	* the scanner iterator will throw a {@link SampleNotPresentException}. Also if a table's sampler
	* configuration is changed while a scanner is iterating over a table, a
	* {@link SampleNotPresentException} may be thrown.
	*
	* @since 1.8.0
	*/
	void setSamplerConfiguration(SamplerConfiguration samplerConfig);

	/**
	* @return currently set sampler configuration. Returns null if no sampler configuration is set.
	* @since 1.8.0
	*/
	SamplerConfiguration getSamplerConfiguration();

	/**
	* Clears sampler configuration making a scanner read all data. After calling this,
	* {@link #getSamplerConfiguration()} should return null.
	*
	* @since 1.8.0
	*/
	void clearSamplerConfiguration();

	/**
	* This setting determines how long a scanner will wait to fill the returned batch. By default, a
	* scanner wait until the batch is full.
	*
	* <p>
	* Setting the timeout to zero (with any time unit) or {@link Long#MAX_VALUE} (with
	* {@link TimeUnit#MILLISECONDS}) means no timeout.
	*
	* @param timeOut
	* the length of the timeout
	* @param timeUnit
	* the units of the timeout
	* @since 1.8.0
	*/
	void setBatchTimeout(long timeOut, TimeUnit timeUnit);

	/**
	* Returns the timeout to fill a batch in the given TimeUnit.
	*
	* @return the batch timeout configured for this scanner
	* @since 1.8.0
	*/
	long getBatchTimeout(TimeUnit timeUnit);

	/**
	* Sets the name of the classloader context on this scanner. See the administration chapter of the
	* user manual for details on how to configure and use classloader contexts.
	*
	* @param classLoaderContext
	* name of the classloader context
	* @throws NullPointerException
	* if context is null
	* @since 1.8.0
	*/
	void setClassLoaderContext(String classLoaderContext);

	/**
	* Clears the current classloader context set on this scanner
	*
	* @since 1.8.0
	*/
	void clearClassLoaderContext();

	/**
	* Returns the name of the current classloader context set on this scanner
	*
	* @return name of the current context
	* @since 1.8.0
	*/
	String getClassLoaderContext();

	/**
	* Set hints for the configured {@link ScanPrioritizer} and {@link ScanDispatcher}. These hints
	* are available on the server side via {@link ScanInfo#getExecutionHints()} Depending on the
	* configuration, these hints may be ignored. Hints will never impact what data is returned by a
	* scan, only how quickly it is returned.
	*
	* <p>
	* The default configuration for Accumulo will ignore hints. See {@link HintScanPrioritizer} and
	* {@link SimpleScanDispatcher} for examples of classes that can react to hints.
	*
	* @since 2.0.0
	*/
	void setExecutionHints(Map<String,String> hints);
	}