blob: 273a79e2c41d0dcd31a5805513f356860f1da576 [file] [log] [blame]
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define C_LUCY_INSTREAM
#include "Lucy/Util/ToolSet.h"
#include "charmony.h"
#include "Lucy/Store/InStream.h"
#include "Lucy/Store/FileHandle.h"
#include "Lucy/Store/FSFileHandle.h"
#include "Lucy/Store/FileWindow.h"
#include "Lucy/Store/RAMFile.h"
#include "Lucy/Store/RAMFileHandle.h"
#include "Lucy/Util/NumberUtils.h"
// Inlined version of InStream_Tell.
static CFISH_INLINE int64_t
SI_tell(InStream *self);
// Inlined version of InStream_Read_Bytes.
static CFISH_INLINE void
SI_read_bytes(InStream *self, char* buf, int64_t len);
// Inlined version of InStream_Read_U8.
static CFISH_INLINE uint8_t
SI_read_u8(InStream *self, InStreamIVARS *const ivars);
// Ensure that the buffer contains exactly the specified amount of data.
static void
S_fill(InStream *self, int64_t amount);
// Refill the buffer, with either IO_STREAM_BUF_SIZE bytes or all remaining
// file content -- whichever is smaller. Throw an error if we're at EOF and
// can't load at least one byte.
static int64_t
S_refill(InStream *self);
static CFISH_INLINE uint32_t
SI_read_cu32(InStream *self);
static CFISH_INLINE uint64_t
SI_read_cu64(InStream *self);
InStream*
InStream_open(Obj *file) {
InStream *self = (InStream*)Class_Make_Obj(INSTREAM);
return InStream_do_open(self, file);
}
InStream*
InStream_do_open(InStream *self, Obj *file) {
InStreamIVARS *const ivars = InStream_IVARS(self);
// Init.
ivars->buf = NULL;
ivars->limit = NULL;
ivars->offset = 0;
ivars->window = FileWindow_new();
// Obtain a FileHandle.
if (Obj_is_a(file, FILEHANDLE)) {
ivars->file_handle = (FileHandle*)INCREF(file);
}
else if (Obj_is_a(file, RAMFILE)) {
ivars->file_handle
= (FileHandle*)RAMFH_open(NULL, FH_READ_ONLY, (RAMFile*)file);
}
else if (Obj_is_a(file, STRING)) {
ivars->file_handle
= (FileHandle*)FSFH_open((String*)file, FH_READ_ONLY);
}
else {
Err_set_error(Err_new(Str_newf("Invalid type for param 'file': '%o'",
Obj_get_class_name(file))));
DECREF(self);
return NULL;
}
if (!ivars->file_handle) {
ERR_ADD_FRAME(Err_get_error());
DECREF(self);
return NULL;
}
// Get length and filename from the FileHandle.
ivars->filename = Str_Clone(FH_Get_Path(ivars->file_handle));
ivars->len = FH_Length(ivars->file_handle);
if (ivars->len == -1) {
ERR_ADD_FRAME(Err_get_error());
DECREF(self);
return NULL;
}
return self;
}
void
InStream_Close_IMP(InStream *self) {
InStreamIVARS *const ivars = InStream_IVARS(self);
if (ivars->file_handle) {
FH_Release_Window(ivars->file_handle, ivars->window);
// Note that we don't close the FileHandle, because it's probably
// shared.
DECREF(ivars->file_handle);
ivars->file_handle = NULL;
}
}
void
InStream_Destroy_IMP(InStream *self) {
InStreamIVARS *const ivars = InStream_IVARS(self);
if (ivars->file_handle) {
InStream_Close(self);
}
DECREF(ivars->filename);
DECREF(ivars->window);
SUPER_DESTROY(self, INSTREAM);
}
InStream*
InStream_Reopen_IMP(InStream *self, String *filename, int64_t offset,
int64_t len) {
InStreamIVARS *const ivars = InStream_IVARS(self);
if (!ivars->file_handle) {
THROW(ERR, "Can't Reopen() closed InStream %o", ivars->filename);
}
if (offset + len > FH_Length(ivars->file_handle)) {
THROW(ERR, "Offset + length too large (%i64 + %i64 > %i64)",
offset, len, FH_Length(ivars->file_handle));
}
Class *klass = InStream_get_class(self);
InStream *other = (InStream*)Class_Make_Obj(klass);
InStreamIVARS *const ovars = InStream_IVARS(other);
InStream_do_open(other, (Obj*)ivars->file_handle);
if (filename != NULL) {
String *temp = ovars->filename;
ovars->filename = Str_Clone(filename);
DECREF(temp);
}
ovars->offset = offset;
ovars->len = len;
InStream_Seek(other, 0);
return other;
}
InStream*
InStream_Clone_IMP(InStream *self) {
InStreamIVARS *const ivars = InStream_IVARS(self);
Class *klass = InStream_get_class(self);
InStream *twin = (InStream*)Class_Make_Obj(klass);
InStream_do_open(twin, (Obj*)ivars->file_handle);
InStream_Seek(twin, SI_tell(self));
return twin;
}
String*
InStream_Get_Filename_IMP(InStream *self) {
return InStream_IVARS(self)->filename;
}
static int64_t
S_refill(InStream *self) {
InStreamIVARS *const ivars = InStream_IVARS(self);
// Determine the amount to request.
const int64_t sub_file_pos = SI_tell(self);
const int64_t remaining = ivars->len - sub_file_pos;
const int64_t amount = remaining < IO_STREAM_BUF_SIZE
? remaining
: IO_STREAM_BUF_SIZE;
if (!remaining) {
THROW(ERR, "Read past EOF of '%o' (offset: %i64 len: %i64)",
ivars->filename, ivars->offset, ivars->len);
}
// Make the request.
S_fill(self, amount);
return amount;
}
void
InStream_Refill_IMP(InStream *self) {
S_refill(self);
}
static void
S_fill(InStream *self, int64_t amount) {
InStreamIVARS *const ivars = InStream_IVARS(self);
FileWindow *const window = ivars->window;
const int64_t virtual_file_pos = SI_tell(self);
const int64_t real_file_pos = virtual_file_pos + ivars->offset;
const int64_t remaining = ivars->len - virtual_file_pos;
// Throw an error if the requested amount would take us beyond EOF.
if (amount > remaining) {
THROW(ERR, "Read past EOF of %o (pos: %i64 len: %i64 request: %i64)",
ivars->filename, virtual_file_pos, ivars->len, amount);
}
// Make the request.
if (FH_Window(ivars->file_handle, window, real_file_pos, amount)) {
char *fw_buf = FileWindow_Get_Buf(window);
int64_t fw_offset = FileWindow_Get_Offset(window);
int64_t fw_len = FileWindow_Get_Len(window);
char *const window_limit = fw_buf + fw_len;
ivars->buf = fw_buf
- fw_offset // theoretical start of real file
+ ivars->offset // top of virtual file
+ virtual_file_pos; // position within virtual file
ivars->limit = window_limit - ivars->buf > remaining
? ivars->buf + remaining
: window_limit;
}
else {
Err *error = Err_get_error();
String *str = Str_newf(" (%o)", ivars->filename);
Err_Cat_Mess(error, str);
DECREF(str);
RETHROW(INCREF(error));
}
}
void
InStream_Fill_IMP(InStream *self, int64_t amount) {
S_fill(self, amount);
}
void
InStream_Seek_IMP(InStream *self, int64_t target) {
InStreamIVARS *const ivars = InStream_IVARS(self);
FileWindow *const window = ivars->window;
char *fw_buf = FileWindow_Get_Buf(window);
int64_t fw_offset = FileWindow_Get_Offset(window);
int64_t fw_len = FileWindow_Get_Len(window);
int64_t virtual_window_top = fw_offset - ivars->offset;
int64_t virtual_window_end = virtual_window_top + fw_len;
if (target < 0) {
THROW(ERR, "Can't Seek '%o' to negative target %i64", ivars->filename,
target);
}
// Seek within window if possible.
else if (target >= virtual_window_top
&& target <= virtual_window_end
) {
ivars->buf = fw_buf - fw_offset + ivars->offset + target;
}
else if (target > ivars->len) {
THROW(ERR, "Can't Seek '%o' past EOF (%i64 > %i64)", ivars->filename,
target, ivars->len);
}
else {
// Target is outside window. Set all buffer and limit variables to
// NULL to trigger refill on the next read. Store the file position
// in the FileWindow's offset.
FH_Release_Window(ivars->file_handle, window);
ivars->buf = NULL;
ivars->limit = NULL;
FileWindow_Set_Offset(window, ivars->offset + target);
}
}
static CFISH_INLINE int64_t
SI_tell(InStream *self) {
InStreamIVARS *const ivars = InStream_IVARS(self);
char *fw_buf = FileWindow_Get_Buf(ivars->window);
int64_t pos_in_buf = CHY_PTR_TO_I64(ivars->buf) - CHY_PTR_TO_I64(fw_buf);
return pos_in_buf + FileWindow_Get_Offset(ivars->window) - ivars->offset;
}
int64_t
InStream_Tell_IMP(InStream *self) {
return SI_tell(self);
}
int64_t
InStream_Length_IMP(InStream *self) {
return InStream_IVARS(self)->len;
}
const char*
InStream_Buf_IMP(InStream *self, size_t request) {
InStreamIVARS *const ivars = InStream_IVARS(self);
const int64_t bytes_in_buf
= CHY_PTR_TO_I64(ivars->limit) - CHY_PTR_TO_I64(ivars->buf);
/* It's common for client code to overestimate how much is needed, because
* the request has to figure in worst-case for compressed data. However,
* if we can still serve them everything they request (e.g. they ask for 5
* bytes, they really need 1 byte, and there's 1k in the buffer), we can
* skip the following refill block. */
if ((int64_t)request > bytes_in_buf) {
const int64_t remaining_in_file = ivars->len - SI_tell(self);
int64_t amount = (int64_t)request;
// Try to bump up small requests.
if (amount < IO_STREAM_BUF_SIZE) { amount = IO_STREAM_BUF_SIZE; }
// Don't read past EOF.
if (remaining_in_file < amount) { amount = remaining_in_file; }
// Only fill if the recalculated, possibly smaller request exceeds the
// amount available in the buffer.
if (amount > bytes_in_buf) {
S_fill(self, amount);
}
}
return ivars->buf;
}
void
InStream_Advance_Buf_IMP(InStream *self, const char *buf) {
InStreamIVARS *const ivars = InStream_IVARS(self);
if (buf > ivars->limit) {
int64_t overrun = CHY_PTR_TO_I64(buf) - CHY_PTR_TO_I64(ivars->limit);
THROW(ERR, "Supplied value is %i64 bytes beyond end of buffer",
overrun);
}
else if (buf < ivars->buf) {
int64_t underrun = CHY_PTR_TO_I64(ivars->buf) - CHY_PTR_TO_I64(buf);
THROW(ERR, "Can't Advance_Buf backwards: (underrun: %i64))", underrun);
}
else {
ivars->buf = buf;
}
}
void
InStream_Read_Bytes_IMP(InStream *self, char* buf, size_t len) {
#if SIZE_MAX > INT64_MAX
if (len >= INT64_MAX) {
THROW(ERR, "Can't read %u64 bytes", (uint64_t)len);
}
#endif
SI_read_bytes(self, buf, (int64_t)len);
}
static CFISH_INLINE void
SI_read_bytes(InStream *self, char* buf, int64_t len) {
InStreamIVARS *const ivars = InStream_IVARS(self);
const int64_t available
= CHY_PTR_TO_I64(ivars->limit) - CHY_PTR_TO_I64(ivars->buf);
if (available >= len) {
// Request is entirely within buffer, so copy.
memcpy(buf, ivars->buf, (size_t)len);
ivars->buf += len;
}
else {
// Pass along whatever we've got in the buffer.
if (available > 0) {
memcpy(buf, ivars->buf, (size_t)available);
buf += available;
len -= available;
ivars->buf += available;
}
if (len < IO_STREAM_BUF_SIZE) {
// Ensure that we have enough mapped, then copy the rest.
int64_t got = S_refill(self);
if (got < len) {
int64_t orig_pos = SI_tell(self) - available;
int64_t orig_len = len + available;
THROW(ERR, "Read past EOF of %o (pos: %i64 len: %i64 "
"request: %i64)", ivars->filename, orig_pos,
ivars->len, orig_len);
}
memcpy(buf, ivars->buf, (size_t)len);
ivars->buf += len;
}
else {
// Too big to handle via the buffer, so resort to a brute-force
// read.
const int64_t sub_file_pos = SI_tell(self);
const int64_t real_file_pos = sub_file_pos + ivars->offset;
bool success
= FH_Read(ivars->file_handle, buf, real_file_pos, (size_t)len);
if (!success) {
RETHROW(INCREF(Err_get_error()));
}
InStream_Seek_IMP(self, sub_file_pos + len);
}
}
}
int8_t
InStream_Read_I8_IMP(InStream *self) {
InStreamIVARS *const ivars = InStream_IVARS(self);
return (int8_t)SI_read_u8(self, ivars);
}
static CFISH_INLINE uint8_t
SI_read_u8(InStream *self, InStreamIVARS *ivars) {
if (ivars->buf >= ivars->limit) { S_refill(self); }
return (uint8_t)(*ivars->buf++);
}
uint8_t
InStream_Read_U8_IMP(InStream *self) {
InStreamIVARS *const ivars = InStream_IVARS(self);
return SI_read_u8(self, ivars);
}
static CFISH_INLINE uint32_t
SI_read_u32(InStream *self) {
uint32_t retval;
SI_read_bytes(self, (char*)&retval, 4);
#ifdef CHY_LITTLE_END
retval = NumUtil_decode_bigend_u32((char*)&retval);
#endif
return retval;
}
uint32_t
InStream_Read_U32_IMP(InStream *self) {
return SI_read_u32(self);
}
int32_t
InStream_Read_I32_IMP(InStream *self) {
return (int32_t)SI_read_u32(self);
}
static CFISH_INLINE uint64_t
SI_read_u64(InStream *self) {
uint64_t retval;
SI_read_bytes(self, (char*)&retval, 8);
#ifdef CHY_LITTLE_END
retval = NumUtil_decode_bigend_u64((char*)&retval);
#endif
return retval;
}
uint64_t
InStream_Read_U64_IMP(InStream *self) {
return SI_read_u64(self);
}
int64_t
InStream_Read_I64_IMP(InStream *self) {
return (int64_t)SI_read_u64(self);
}
float
InStream_Read_F32_IMP(InStream *self) {
union { float f; uint32_t u32; } duo;
SI_read_bytes(self, (char*)&duo, sizeof(float));
#ifdef CHY_LITTLE_END
duo.u32 = NumUtil_decode_bigend_u32(&duo.u32);
#endif
return duo.f;
}
double
InStream_Read_F64_IMP(InStream *self) {
union { double d; uint64_t u64; } duo;
SI_read_bytes(self, (char*)&duo, sizeof(double));
#ifdef CHY_LITTLE_END
duo.u64 = NumUtil_decode_bigend_u64(&duo.u64);
#endif
return duo.d;
}
int32_t
InStream_Read_CI32_IMP(InStream *self) {
return (int32_t)SI_read_cu32(self);
}
uint32_t
InStream_Read_CU32_IMP(InStream *self) {
return SI_read_cu32(self);
}
static CFISH_INLINE uint32_t
SI_read_cu32(InStream *self) {
InStreamIVARS *const ivars = InStream_IVARS(self);
uint32_t retval = 0;
while (1) {
const uint8_t ubyte = SI_read_u8(self, ivars);
retval = (retval << 7) | (ubyte & 0x7f);
if ((ubyte & 0x80) == 0) {
break;
}
}
return retval;
}
int64_t
InStream_Read_CI64_IMP(InStream *self) {
return (int64_t)SI_read_cu64(self);
}
uint64_t
InStream_Read_CU64_IMP(InStream *self) {
return SI_read_cu64(self);
}
static CFISH_INLINE uint64_t
SI_read_cu64(InStream *self) {
InStreamIVARS *const ivars = InStream_IVARS(self);
uint64_t retval = 0;
while (1) {
const uint8_t ubyte = SI_read_u8(self, ivars);
retval = (retval << 7) | (ubyte & 0x7f);
if ((ubyte & 0x80) == 0) {
break;
}
}
return retval;
}
int
InStream_Read_Raw_C64_IMP(InStream *self, char *buf) {
InStreamIVARS *const ivars = InStream_IVARS(self);
uint8_t *dest = (uint8_t*)buf;
do {
*dest = SI_read_u8(self, ivars);
} while ((*dest++ & 0x80) != 0);
return (int)(dest - (uint8_t*)buf);
}