blob: 6b3b788c06b631392055a8532a2303d7f08f4304 [file] [log] [blame]
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
\brief An example implementation of the Sofa Stream Handler APIs
defined in uima/sofastreamhandler.hpp
to support the file URI scheme
Description: This implements a SofaStreamHandler for a file URI scheme.
The file to be read is specified as an argument to
openStream(char * uriString). uriString is expected to be
contain the URI scheme and path to file as follows:
eg: file://C:\myfile.txt on Windows
file:///home/myid/myfile.txt on UNIX
To build the SofaStreamHandlerFile DLL:
On linux:
make -f SofaStreamHandlerFile.mak
On Windows:
devenv SofaStreamHandlerFile.vcproj /build Release
The handler dll must be registered with the UIMA framework
by setting an environement variable as follows:
On Linux:
export UIMACPP_STREAMHANDLERS=file:SofaStreamHandlerFile
On Windows:
set UIMACPP_STREAMHANDLERS=file:SofaStreamHandlerFile
-------------------------------------------------------------------------- */
#include <uima/api.hpp>
#include <uima/sofastreamhandler.hpp>
#include <sys/stat.h>
#if defined(__OS_UNIX__)
#define LSEEK lseek
#include <unistd.h>
#elif defined(__OS_WIN32__)
#define LSEEK _lseeki64
#include <io.h>
#include <stdio.h>
using namespace uima;
using namespace std;
class SofaStreamHandlerFile : public SofaStreamHandler {
FILE * stream;
char * buffer;
INT64 filesize;
size_t buflen;
INT64 cur_pos ;
// openStream
// @param - uriString in UTF-8
void openStream(const char * uriString) {
return openStream(uriString,BUFSIZ);
// openStreamWithMinInternalBufSize
// open the file for reading and allocate an internal buffer of at least the
// specified size.
// @param - uriString in UTF-8
// minimumBufferSize
void openStream (const char * uriString, size_t minimumBufferSize) {
cout << "SofaStreamHandlerFile: openStream() " << uriString << endl;
if (strncmp(uriString, "file://", 7) == 0) {
buffer = new char[minimumBufferSize];
buflen = minimumBufferSize;
const char * filename = uriString+7;
stream = fopen(filename,"rb");
if (stream == NULL) {
ErrorMessage(UIMA_MSG_ID_LITERAL_STRING, "Invalid value for SegmentDelimiter"),
} else {
int rc = setvbuf (stream, buffer, _IOFBF, buflen);
if (rc==0) {
struct stat fstat;
stat(filename, &fstat);
filesize = fstat.st_size;
} else {
ErrorMessage(UIMA_MSG_ID_LITERAL_STRING, "Invalid value for SegmentDelimiter"),
return ;
// getTotalStreamSize
// @return total size of the data
INT64 getTotalStreamSize() {
return filesize;
// getBufferSize
// @return buffer length
size_t getBufferSize() {
return buflen;
// howManyAvailable
// @return number of bytes available for read
// -1 indicates EOF
INT64 howManyAvailable() {
if (filesize-cur_pos < 0) return -1;
else return (filesize-cur_pos);
// getNext - deliver the specified number of bytes in network
// byte order.
// reads the specified number of bytes from the
// current position. Advances the current position
// by the number of bytes read. This implementation
// assumes that the data in the file is in network
// byte order.
// This call blocks till read request is satisfied or
// EOF is reached.
// @param - numBytes, the number of bytes to read
// - pBuffer, buffer into which the bytes are to be copied.
// @returns number of bytes actually copied into buffer.
// This may be less than the number of
// bytes requested.
// -1 indicates EOF.
INT64 getNext(size_t numBytes,
void * pBuffer) {
cout << "SofaStreamHandlerFile: getNext() " << numBytes << endl;
if (feof(stream)) {
return -1;
size_t numread = fread(pBuffer,1,numBytes,stream);
cur_pos += numread;
return numread;
// seek
// sets the position within the current stream.
// @param - offset - number of bytes from origin
// - origin is one of the following (taken from lseek spec):
// If SEEK_SET, the position is set to offset bytes.
// If SEEK_CUR, the position is set to its
// current location plus offset bytes.
// If SEEK_END, the position is set to the total length
// plus offset bytes.
// These constants are defined in stdio.h
// @return 0 indicates success
int seek (INT64 offset, int origin) {
if (offset == 0) {
cur_pos = fseek(stream, offset, origin );
return 0;
INT64 remaining = offset;
long off = 0;
while (remaining > 0) {
if (remaining > LONG_MAX) {
off = LONG_MAX;
} else {
off = remaining;
remaining = remaining-off;
cur_pos = fseek(stream, off, origin );
if (cur_pos == -1L)
return -3;
else return 0 ;
// closeStream
// close the stream. Delete the internal buffer.
void closeStream() {
if (stream != NULL) {
if (buffer != NULL) {
delete[] buffer;
// getDataPointer
// This returns a pointer to the data in memory.
// @return NULL.
void * getDataPointer() {
return NULL;