blob: 1f481e18ff7012ab0b7cb7a2dd0551cb4def6c1b [file] [log] [blame]
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define C_LUCY_INSTREAM
#define C_LUCY_FILEWINDOW
#include "Lucy/Util/ToolSet.h"
#include "Lucy/Store/InStream.h"
#include "Lucy/Store/FileHandle.h"
#include "Lucy/Store/FSFileHandle.h"
#include "Lucy/Store/FileWindow.h"
#include "Lucy/Store/RAMFile.h"
#include "Lucy/Store/RAMFileHandle.h"
// Inlined version of InStream_Tell.
static INLINE int64_t
SI_tell(InStream *self);
// Inlined version of InStream_Read_Bytes.
static INLINE void
SI_read_bytes(InStream *self, char* buf, size_t len);
// Inlined version of InStream_Read_U8.
static INLINE uint8_t
SI_read_u8(InStream *self);
// Ensure that the buffer contains exactly the specified amount of data.
static void
S_fill(InStream *self, int64_t amount);
// Refill the buffer, with either IO_STREAM_BUF_SIZE bytes or all remaining
// file content -- whichever is smaller. Throw an error if we're at EOF and
// can't load at least one byte.
static int64_t
S_refill(InStream *self);
InStream*
InStream_open(Obj *file) {
InStream *self = (InStream*)VTable_Make_Obj(INSTREAM);
return InStream_do_open(self, file);
}
InStream*
InStream_do_open(InStream *self, Obj *file) {
// Init.
self->buf = NULL;
self->limit = NULL;
self->offset = 0;
self->window = FileWindow_new();
// Obtain a FileHandle.
if (Obj_Is_A(file, FILEHANDLE)) {
self->file_handle = (FileHandle*)INCREF(file);
}
else if (Obj_Is_A(file, RAMFILE)) {
self->file_handle
= (FileHandle*)RAMFH_open(NULL, FH_READ_ONLY, (RAMFile*)file);
}
else if (Obj_Is_A(file, CHARBUF)) {
self->file_handle
= (FileHandle*)FSFH_open((CharBuf*)file, FH_READ_ONLY);
}
else {
Err_set_error(Err_new(CB_newf("Invalid type for param 'file': '%o'",
Obj_Get_Class_Name(file))));
DECREF(self);
return NULL;
}
if (!self->file_handle) {
ERR_ADD_FRAME(Err_get_error());
DECREF(self);
return NULL;
}
// Get length and filename from the FileHandle.
self->filename = CB_Clone(FH_Get_Path(self->file_handle));
self->len = FH_Length(self->file_handle);
if (self->len == -1) {
ERR_ADD_FRAME(Err_get_error());
DECREF(self);
return NULL;
}
return self;
}
void
InStream_close(InStream *self) {
if (self->file_handle) {
FH_Release_Window(self->file_handle, self->window);
// Note that we don't close the FileHandle, because it's probably
// shared.
DECREF(self->file_handle);
self->file_handle = NULL;
}
}
void
InStream_destroy(InStream *self) {
if (self->file_handle) {
InStream_Close(self);
}
DECREF(self->filename);
DECREF(self->window);
SUPER_DESTROY(self, INSTREAM);
}
InStream*
InStream_reopen(InStream *self, const CharBuf *filename, int64_t offset,
int64_t len) {
if (!self->file_handle) {
THROW(ERR, "Can't Reopen() closed InStream %o", self->filename);
}
if (offset + len > FH_Length(self->file_handle)) {
THROW(ERR, "Offset + length too large (%i64 + %i64 > %i64)",
offset, len, FH_Length(self->file_handle));
}
InStream *twin = (InStream*)VTable_Make_Obj(self->vtable);
InStream_do_open(twin, (Obj*)self->file_handle);
if (filename != NULL) { CB_Mimic(twin->filename, (Obj*)filename); }
twin->offset = offset;
twin->len = len;
InStream_Seek(twin, 0);
return twin;
}
InStream*
InStream_clone(InStream *self) {
InStream *twin = (InStream*)VTable_Make_Obj(self->vtable);
InStream_do_open(twin, (Obj*)self->file_handle);
InStream_Seek(twin, SI_tell(self));
return twin;
}
CharBuf*
InStream_get_filename(InStream *self) {
return self->filename;
}
static int64_t
S_refill(InStream *self) {
// Determine the amount to request.
const int64_t sub_file_pos = SI_tell(self);
const int64_t remaining = self->len - sub_file_pos;
const int64_t amount = remaining < IO_STREAM_BUF_SIZE
? remaining
: IO_STREAM_BUF_SIZE;
if (!remaining) {
THROW(ERR, "Read past EOF of '%o' (offset: %i64 len: %i64)",
self->filename, self->offset, self->len);
}
// Make the request.
S_fill(self, amount);
return amount;
}
void
InStream_refill(InStream *self) {
S_refill(self);
}
static void
S_fill(InStream *self, int64_t amount) {
FileWindow *const window = self->window;
const int64_t virtual_file_pos = SI_tell(self);
const int64_t real_file_pos = virtual_file_pos + self->offset;
const int64_t remaining = self->len - virtual_file_pos;
// Throw an error if the requested amount would take us beyond EOF.
if (amount > remaining) {
THROW(ERR, "Read past EOF of %o (pos: %u64 len: %u64 request: %u64)",
self->filename, virtual_file_pos, self->len, amount);
}
// Make the request.
if (FH_Window(self->file_handle, window, real_file_pos, amount)) {
char *const window_limit = window->buf + window->len;
self->buf = window->buf
- window->offset // theoretical start of real file
+ self->offset // top of virtual file
+ virtual_file_pos; // position within virtual file
self->limit = window_limit - self->buf > remaining
? self->buf + remaining
: window_limit;
}
else {
Err *error = Err_get_error();
CB_catf(Err_Get_Mess(error), " (%o)", self->filename);
RETHROW(INCREF(error));
}
}
void
InStream_fill(InStream *self, int64_t amount) {
S_fill(self, amount);
}
void
InStream_seek(InStream *self, int64_t target) {
FileWindow *const window = self->window;
int64_t virtual_window_top = window->offset - self->offset;
int64_t virtual_window_end = virtual_window_top + window->len;
if (target < 0) {
THROW(ERR, "Can't Seek '%o' to negative target %i64", self->filename,
target);
}
// Seek within window if possible.
else if (target >= virtual_window_top
&& target <= virtual_window_end
) {
self->buf = window->buf - window->offset + self->offset + target;
}
else if (target > self->len) {
THROW(ERR, "Can't Seek '%o' past EOF (%i64 > %i64)", self->filename,
target, self->len);
}
else {
// Target is outside window. Set all buffer and limit variables to
// NULL to trigger refill on the next read. Store the file position
// in the FileWindow's offset.
FH_Release_Window(self->file_handle, window);
self->buf = NULL;
self->limit = NULL;
FileWindow_Set_Offset(window, self->offset + target);
}
}
static INLINE int64_t
SI_tell(InStream *self) {
FileWindow *const window = self->window;
int64_t pos_in_buf = PTR_TO_I64(self->buf) - PTR_TO_I64(window->buf);
return pos_in_buf + window->offset - self->offset;
}
int64_t
InStream_tell(InStream *self) {
return SI_tell(self);
}
int64_t
InStream_length(InStream *self) {
return self->len;
}
char*
InStream_buf(InStream *self, size_t request) {
const int64_t bytes_in_buf = PTR_TO_I64(self->limit) - PTR_TO_I64(self->buf);
/* It's common for client code to overestimate how much is needed, because
* the request has to figure in worst-case for compressed data. However,
* if we can still serve them everything they request (e.g. they ask for 5
* bytes, they really need 1 byte, and there's 1k in the buffer), we can
* skip the following refill block. */
if ((int64_t)request > bytes_in_buf) {
const int64_t remaining_in_file = self->len - SI_tell(self);
int64_t amount = request;
// Try to bump up small requests.
if (amount < IO_STREAM_BUF_SIZE) { amount = IO_STREAM_BUF_SIZE; }
// Don't read past EOF.
if (remaining_in_file < amount) { amount = remaining_in_file; }
// Only fill if the recalculated, possibly smaller request exceeds the
// amount available in the buffer.
if (amount > bytes_in_buf) {
S_fill(self, amount);
}
}
return self->buf;
}
void
InStream_advance_buf(InStream *self, char *buf) {
if (buf > self->limit) {
int64_t overrun = PTR_TO_I64(buf) - PTR_TO_I64(self->limit);
THROW(ERR, "Supplied value is %i64 bytes beyond end of buffer",
overrun);
}
else if (buf < self->buf) {
int64_t underrun = PTR_TO_I64(self->buf) - PTR_TO_I64(buf);
THROW(ERR, "Can't Advance_Buf backwards: (underrun: %i64))", underrun);
}
else {
self->buf = buf;
}
}
void
InStream_read_bytes(InStream *self, char* buf, size_t len) {
SI_read_bytes(self, buf, len);
}
static INLINE void
SI_read_bytes(InStream *self, char* buf, size_t len) {
const int64_t available = PTR_TO_I64(self->limit) - PTR_TO_I64(self->buf);
if (available >= (int64_t)len) {
// Request is entirely within buffer, so copy.
memcpy(buf, self->buf, len);
self->buf += len;
}
else {
// Pass along whatever we've got in the buffer.
if (available > 0) {
memcpy(buf, self->buf, (size_t)available);
buf += available;
len -= (size_t)available;
self->buf += available;
}
if (len < IO_STREAM_BUF_SIZE) {
// Ensure that we have enough mapped, then copy the rest.
int64_t got = S_refill(self);
if (got < (int64_t)len) {
int64_t orig_pos = SI_tell(self) - available;
int64_t orig_len = len + available;
THROW(ERR, "Read past EOF of %o (pos: %i64 len: %i64 "
"request: %i64)", self->filename, orig_pos,
self->len, orig_len);
}
memcpy(buf, self->buf, len);
self->buf += len;
}
else {
// Too big to handle via the buffer, so resort to a brute-force
// read.
const int64_t sub_file_pos = SI_tell(self);
const int64_t real_file_pos = sub_file_pos + self->offset;
bool_t success
= FH_Read(self->file_handle, buf, real_file_pos, len);
if (!success) {
RETHROW(INCREF(Err_get_error()));
}
InStream_seek(self, sub_file_pos + len);
}
}
}
int8_t
InStream_read_i8(InStream *self) {
return (int8_t)SI_read_u8(self);
}
static INLINE uint8_t
SI_read_u8(InStream *self) {
if (self->buf >= self->limit) { S_refill(self); }
return (uint8_t)(*self->buf++);
}
uint8_t
InStream_read_u8(InStream *self) {
return SI_read_u8(self);
}
static INLINE uint32_t
SI_read_u32(InStream *self) {
uint32_t retval;
SI_read_bytes(self, (char*)&retval, 4);
#ifdef LITTLE_END
retval = NumUtil_decode_bigend_u32((char*)&retval);
#endif
return retval;
}
uint32_t
InStream_read_u32(InStream *self) {
return SI_read_u32(self);
}
int32_t
InStream_read_i32(InStream *self) {
return (int32_t)SI_read_u32(self);
}
static INLINE uint64_t
SI_read_u64(InStream *self) {
uint64_t retval;
SI_read_bytes(self, (char*)&retval, 8);
#ifdef LITTLE_END
retval = NumUtil_decode_bigend_u64((char*)&retval);
#endif
return retval;
}
uint64_t
InStream_read_u64(InStream *self) {
return SI_read_u64(self);
}
int64_t
InStream_read_i64(InStream *self) {
return (int64_t)SI_read_u64(self);
}
float
InStream_read_f32(InStream *self) {
union { float f; uint32_t u32; } duo;
SI_read_bytes(self, (char*)&duo, sizeof(float));
#ifdef LITTLE_END
duo.u32 = NumUtil_decode_bigend_u32(&duo.u32);
#endif
return duo.f;
}
double
InStream_read_f64(InStream *self) {
union { double d; uint64_t u64; } duo;
SI_read_bytes(self, (char*)&duo, sizeof(double));
#ifdef LITTLE_END
duo.u64 = NumUtil_decode_bigend_u64(&duo.u64);
#endif
return duo.d;
}
uint32_t
InStream_read_c32(InStream *self) {
uint32_t retval = 0;
while (1) {
const uint8_t ubyte = SI_read_u8(self);
retval = (retval << 7) | (ubyte & 0x7f);
if ((ubyte & 0x80) == 0) {
break;
}
}
return retval;
}
uint64_t
InStream_read_c64(InStream *self) {
uint64_t retval = 0;
while (1) {
const uint8_t ubyte = SI_read_u8(self);
retval = (retval << 7) | (ubyte & 0x7f);
if ((ubyte & 0x80) == 0) {
break;
}
}
return retval;
}
int
InStream_read_raw_c64(InStream *self, char *buf) {
uint8_t *dest = (uint8_t*)buf;
do {
*dest = SI_read_u8(self);
} while ((*dest++ & 0x80) != 0);
return dest - (uint8_t*)buf;
}