blob: 7d46529f93046d8efc5dc819812d178178aa0ceb [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <string>
#include "util/spinlock.h"
#include "common/status.h"
namespace impala {
namespace io {
class DiskIoMgr;
class DiskQueue;
class RequestContext;
class ScanRange;
/// Abstract class that provides interface for file operations
/// Child classes implement these operations for the local file system
/// and for HDFS.
/// A FileReader object is owned by a single ScanRange object, and
/// a ScanRange object only has a single FileReader object.
class FileReader {
public:
FileReader(ScanRange* scan_range) : scan_range_(scan_range) {}
virtual ~FileReader() {}
/// Opens file that is associated with 'scan_range_'.
/// 'use_file_handle_cache' currently only used by HdfsFileReader.
virtual Status Open(bool use_file_handle_cache) = 0;
/// Reads bytes from given position ('file_offset'). Tries to read
/// 'bytes_to_read' amount of bytes. 'bytes_read' contains the number of
/// bytes actually read. 'eof' is set to true when end of file has reached.
/// Metrics in 'queue' are updated with the size and latencies of the read
/// operations on the underlying file system.
virtual Status ReadFromPos(DiskQueue* queue, int64_t file_offset, uint8_t* buffer,
int64_t bytes_to_read, int64_t* bytes_read, bool* eof) = 0;
/// ***Currently only for HDFS***
/// When successful, sets 'data' to a buffer that contains the contents of a file,
/// and 'length' is set to the length of the data.
/// When unsuccessful, 'data' is set to nullptr.
virtual void CachedFile(uint8_t** data, int64_t* length) = 0;
/// Closes the file associated with 'scan_range_'. It doesn't have effect on other
/// scan ranges.
virtual void Close() = 0;
/// Resets internal bookkeeping
virtual void ResetState() {}
// Debug string of this file reader.
virtual std::string DebugString() const { return ""; }
SpinLock& lock() { return lock_; }
protected:
/// Lock that should be taken during fs calls. Only one thread (the disk reading
/// thread) calls into fs at a time so this lock does not have performance impact.
/// This lock only serves to coordinate cleanup. Specifically it serves to ensure
/// that the disk threads are finished with FS calls before scan_range_->is_cancelled_
/// is set to true and cleanup starts.
/// If this lock and scan_range_->lock_ need to be taken, scan_range_->lock_ must be
/// taken first.
SpinLock lock_;
/// The scan range this file reader serves.
ScanRange* const scan_range_;
};
}
}