blob: d8a661f8cc25b2e737c3a9fd4a37f3785e7b401e [file] [log] [blame]
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @file core_filters.c
* @brief Core input/output network filters.
*/
#include "apr.h"
#include "apr_strings.h"
#include "apr_lib.h"
#include "apr_fnmatch.h"
#include "apr_hash.h"
#include "apr_thread_proc.h" /* for RLIMIT stuff */
#include "apr_version.h"
#define APR_WANT_IOVEC
#define APR_WANT_STRFUNC
#define APR_WANT_MEMFUNC
#include "apr_want.h"
#include "ap_config.h"
#include "httpd.h"
#include "http_config.h"
#include "http_core.h"
#include "http_protocol.h" /* For index_of_response(). Grump. */
#include "http_request.h"
#include "http_vhost.h"
#include "http_main.h" /* For the default_handler below... */
#include "http_log.h"
#include "util_md5.h"
#include "http_connection.h"
#include "apr_buckets.h"
#include "util_filter.h"
#include "util_ebcdic.h"
#include "mpm_common.h"
#include "scoreboard.h"
#include "mod_core.h"
#include "ap_listen.h"
#include "mod_so.h" /* for ap_find_loaded_module_symbol */
#define AP_MIN_SENDFILE_BYTES (256)
/**
* Remove all zero length buckets from the brigade.
*/
#define BRIGADE_NORMALIZE(b) \
do { \
apr_bucket *e = APR_BRIGADE_FIRST(b); \
do { \
if (e->length == 0 && !APR_BUCKET_IS_METADATA(e)) { \
apr_bucket *d; \
d = APR_BUCKET_NEXT(e); \
apr_bucket_delete(e); \
e = d; \
} \
else { \
e = APR_BUCKET_NEXT(e); \
} \
} while (!APR_BRIGADE_EMPTY(b) && (e != APR_BRIGADE_SENTINEL(b))); \
} while (0)
/* we know core's module_index is 0 */
#undef APLOG_MODULE_INDEX
#define APLOG_MODULE_INDEX AP_CORE_MODULE_INDEX
struct core_output_filter_ctx {
apr_bucket_brigade *buffered_bb;
apr_pool_t *deferred_write_pool;
apr_size_t bytes_written;
struct iovec *vec;
apr_size_t nvec;
};
apr_status_t ap_core_input_filter(ap_filter_t *f, apr_bucket_brigade *b,
ap_input_mode_t mode, apr_read_type_e block,
apr_off_t readbytes)
{
apr_status_t rv;
core_net_rec *net = f->ctx;
core_ctx_t *ctx = net->in_ctx;
const char *str;
apr_size_t len;
if (mode == AP_MODE_INIT) {
/*
* this mode is for filters that might need to 'initialize'
* a connection before reading request data from a client.
* NNTP over SSL for example needs to handshake before the
* server sends the welcome message.
* such filters would have changed the mode before this point
* is reached. however, protocol modules such as NNTP should
* not need to know anything about SSL. given the example, if
* SSL is not in the filter chain, AP_MODE_INIT is a noop.
*/
return APR_SUCCESS;
}
if (!ctx)
{
net->in_ctx = ctx = apr_palloc(f->c->pool, sizeof(*ctx));
ctx->b = apr_brigade_create(f->c->pool, f->c->bucket_alloc);
ctx->tmpbb = apr_brigade_create(f->c->pool, f->c->bucket_alloc);
/* seed the brigade with the client socket. */
rv = ap_run_insert_network_bucket(f->c, ctx->b, net->client_socket);
if (rv != APR_SUCCESS)
return rv;
}
else if (APR_BRIGADE_EMPTY(ctx->b)) {
return APR_EOF;
}
/* ### This is bad. */
BRIGADE_NORMALIZE(ctx->b);
/* check for empty brigade again *AFTER* BRIGADE_NORMALIZE()
* If we have lost our socket bucket (see above), we are EOF.
*
* Ideally, this should be returning SUCCESS with EOS bucket, but
* some higher-up APIs (spec. read_request_line via ap_rgetline)
* want an error code. */
if (APR_BRIGADE_EMPTY(ctx->b)) {
return APR_EOF;
}
if (mode == AP_MODE_GETLINE) {
/* we are reading a single LF line, e.g. the HTTP headers */
rv = apr_brigade_split_line(b, ctx->b, block, HUGE_STRING_LEN);
/* We should treat EAGAIN here the same as we do for EOF (brigade is
* empty). We do this by returning whatever we have read. This may
* or may not be bogus, but is consistent (for now) with EOF logic.
*/
if (APR_STATUS_IS_EAGAIN(rv) && block == APR_NONBLOCK_READ) {
rv = APR_SUCCESS;
}
return rv;
}
/* ### AP_MODE_PEEK is a horrific name for this mode because we also
* eat any CRLFs that we see. That's not the obvious intention of
* this mode. Determine whether anyone actually uses this or not. */
if (mode == AP_MODE_EATCRLF) {
apr_bucket *e;
const char *c;
/* The purpose of this loop is to ignore any CRLF (or LF) at the end
* of a request. Many browsers send extra lines at the end of POST
* requests. We use the PEEK method to determine if there is more
* data on the socket, so that we know if we should delay sending the
* end of one request until we have served the second request in a
* pipelined situation. We don't want to actually delay sending a
* response if the server finds a CRLF (or LF), becuause that doesn't
* mean that there is another request, just a blank line.
*/
while (1) {
if (APR_BRIGADE_EMPTY(ctx->b))
return APR_EOF;
e = APR_BRIGADE_FIRST(ctx->b);
rv = apr_bucket_read(e, &str, &len, APR_NONBLOCK_READ);
if (rv != APR_SUCCESS)
return rv;
c = str;
while (c < str + len) {
if (*c == APR_ASCII_LF)
c++;
else if (*c == APR_ASCII_CR && *(c + 1) == APR_ASCII_LF)
c += 2;
else
return APR_SUCCESS;
}
/* If we reach here, we were a bucket just full of CRLFs, so
* just toss the bucket. */
/* FIXME: Is this the right thing to do in the core? */
apr_bucket_delete(e);
}
return APR_SUCCESS;
}
/* If mode is EXHAUSTIVE, we want to just read everything until the end
* of the brigade, which in this case means the end of the socket.
* To do this, we attach the brigade that has currently been setaside to
* the brigade that was passed down, and send that brigade back.
*
* NOTE: This is VERY dangerous to use, and should only be done with
* extreme caution. FWLIW, this would be needed by an MPM like Perchild;
* such an MPM can easily request the socket and all data that has been
* read, which means that it can pass it to the correct child process.
*/
if (mode == AP_MODE_EXHAUSTIVE) {
apr_bucket *e;
/* Tack on any buckets that were set aside. */
APR_BRIGADE_CONCAT(b, ctx->b);
/* Since we've just added all potential buckets (which will most
* likely simply be the socket bucket) we know this is the end,
* so tack on an EOS too. */
/* We have read until the brigade was empty, so we know that we
* must be EOS. */
e = apr_bucket_eos_create(f->c->bucket_alloc);
APR_BRIGADE_INSERT_TAIL(b, e);
return APR_SUCCESS;
}
/* read up to the amount they specified. */
if (mode == AP_MODE_READBYTES || mode == AP_MODE_SPECULATIVE) {
apr_bucket *e;
AP_DEBUG_ASSERT(readbytes > 0);
e = APR_BRIGADE_FIRST(ctx->b);
rv = apr_bucket_read(e, &str, &len, block);
if (APR_STATUS_IS_EAGAIN(rv) && block == APR_NONBLOCK_READ) {
/* getting EAGAIN for a blocking read is an error; for a
* non-blocking read, return an empty brigade. */
return APR_SUCCESS;
}
else if (rv != APR_SUCCESS) {
return rv;
}
else if (block == APR_BLOCK_READ && len == 0) {
/* We wanted to read some bytes in blocking mode. We read
* 0 bytes. Hence, we now assume we are EOS.
*
* When we are in normal mode, return an EOS bucket to the
* caller.
* When we are in speculative mode, leave ctx->b empty, so
* that the next call returns an EOS bucket.
*/
apr_bucket_delete(e);
if (mode == AP_MODE_READBYTES) {
e = apr_bucket_eos_create(f->c->bucket_alloc);
APR_BRIGADE_INSERT_TAIL(b, e);
}
return APR_SUCCESS;
}
/* Have we read as much data as we wanted (be greedy)? */
if (len < readbytes) {
apr_size_t bucket_len;
rv = APR_SUCCESS;
/* We already registered the data in e in len */
e = APR_BUCKET_NEXT(e);
while ((len < readbytes) && (rv == APR_SUCCESS)
&& (e != APR_BRIGADE_SENTINEL(ctx->b))) {
/* Check for the availability of buckets with known length */
if (e->length != (apr_size_t)-1) {
len += e->length;
e = APR_BUCKET_NEXT(e);
}
else {
/*
* Read from bucket, but non blocking. If there isn't any
* more data, well than this is fine as well, we will
* not wait for more since we already got some and we are
* only checking if there isn't more.
*/
rv = apr_bucket_read(e, &str, &bucket_len,
APR_NONBLOCK_READ);
if (rv == APR_SUCCESS) {
len += bucket_len;
e = APR_BUCKET_NEXT(e);
}
}
}
}
/* We can only return at most what we read. */
if (len < readbytes) {
readbytes = len;
}
rv = apr_brigade_partition(ctx->b, readbytes, &e);
if (rv != APR_SUCCESS) {
return rv;
}
/* Must do move before CONCAT */
ctx->tmpbb = apr_brigade_split_ex(ctx->b, e, ctx->tmpbb);
if (mode == AP_MODE_READBYTES) {
APR_BRIGADE_CONCAT(b, ctx->b);
}
else if (mode == AP_MODE_SPECULATIVE) {
apr_bucket *copy_bucket;
for (e = APR_BRIGADE_FIRST(ctx->b);
e != APR_BRIGADE_SENTINEL(ctx->b);
e = APR_BUCKET_NEXT(e))
{
rv = apr_bucket_copy(e, &copy_bucket);
if (rv != APR_SUCCESS) {
return rv;
}
APR_BRIGADE_INSERT_TAIL(b, copy_bucket);
}
}
/* Take what was originally there and place it back on ctx->b */
APR_BRIGADE_CONCAT(ctx->b, ctx->tmpbb);
}
return APR_SUCCESS;
}
static void setaside_remaining_output(ap_filter_t *f,
core_output_filter_ctx_t *ctx,
apr_bucket_brigade *bb,
conn_rec *c);
static apr_status_t send_brigade_nonblocking(apr_socket_t *s,
apr_bucket_brigade *bb,
core_output_filter_ctx_t *ctx,
conn_rec *c);
static apr_status_t writev_nonblocking(apr_socket_t *s,
apr_bucket_brigade *bb,
core_output_filter_ctx_t *ctx,
apr_size_t bytes_to_write,
apr_size_t nvec,
conn_rec *c);
#if APR_HAS_SENDFILE
static apr_status_t sendfile_nonblocking(apr_socket_t *s,
apr_bucket *bucket,
core_output_filter_ctx_t *ctx,
conn_rec *c);
#endif
/* XXX: Should these be configurable parameters? */
#define THRESHOLD_MIN_WRITE 4096
/* Optional function coming from mod_logio, used for logging of output
* traffic
*/
extern APR_OPTIONAL_FN_TYPE(ap_logio_add_bytes_out) *ap__logio_add_bytes_out;
static int should_send_brigade(apr_bucket_brigade *bb, conn_rec *c, int *flush)
{
core_server_config *conf =
ap_get_core_module_config(c->base_server->module_config);
apr_size_t total_bytes = 0, non_file_bytes = 0;
apr_uint32_t eor_buckets = 0;
apr_bucket *bucket;
int need_flush = 0;
/* Scan through the brigade and decide whether we need to flush it,
* based on the following rules:
*
* a) The brigade contains a flush bucket: Do a blocking write
* of everything up that point.
*
* b) The request is in CONN_STATE_HANDLER state, and the brigade
* contains at least flush_max_threshold bytes in non-file
* buckets: Do blocking writes until the amount of data in the
* buffer is less than flush_max_threshold. (The point of this
* rule is to provide flow control, in case a handler is
* streaming out lots of data faster than the data can be
* sent to the client.)
*
* c) The request is in CONN_STATE_HANDLER state, and the brigade
* contains at least flush_max_pipelined EOR buckets:
* Do blocking writes until less than flush_max_pipelined EOR
* buckets are left. (The point of this rule is to prevent too many
* FDs being kept open by pipelined requests, possibly allowing a
* DoS).
*
* d) The brigade contains a morphing bucket: otherwise ap_save_brigade()
* could read the whole bucket into memory.
*/
for (bucket = APR_BRIGADE_FIRST(bb);
bucket != APR_BRIGADE_SENTINEL(bb);
bucket = APR_BUCKET_NEXT(bucket)) {
if (!APR_BUCKET_IS_METADATA(bucket)) {
if (bucket->length == (apr_size_t)-1) {
if (flush) {
ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, c,
"core_output_filter: flushing because "
"of morphing bucket");
}
need_flush = 1;
break;
}
total_bytes += bucket->length;
if (!APR_BUCKET_IS_FILE(bucket)) {
non_file_bytes += bucket->length;
if (non_file_bytes > conf->flush_max_threshold) {
if (flush) {
ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, c,
"core_output_filter: flushing because "
"of max threshold");
}
need_flush = 1;
break;
}
}
}
else if (APR_BUCKET_IS_FLUSH(bucket)) {
if (flush) {
ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, c,
"core_output_filter: flushing because "
"of FLUSH bucket");
}
need_flush = 1;
break;
}
else if (AP_BUCKET_IS_EOR(bucket)
&& conf->flush_max_pipelined >= 0
&& ++eor_buckets > conf->flush_max_pipelined) {
if (flush) {
ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, c,
"core_output_filter: flushing because "
"of max pipelined");
}
need_flush = 1;
break;
}
}
if (flush) {
*flush = need_flush;
}
/* Also send if above flush_min_threshold, or if there are FILE buckets */
return (need_flush
|| total_bytes >= THRESHOLD_MIN_WRITE
|| total_bytes > non_file_bytes);
}
apr_status_t ap_core_output_filter(ap_filter_t *f, apr_bucket_brigade *new_bb)
{
conn_rec *c = f->c;
core_net_rec *net = f->ctx;
core_output_filter_ctx_t *ctx = net->out_ctx;
apr_bucket_brigade *bb = NULL;
apr_status_t rv = APR_SUCCESS;
/* Fail quickly if the connection has already been aborted. */
if (c->aborted) {
if (new_bb != NULL) {
apr_brigade_cleanup(new_bb);
}
return APR_ECONNABORTED;
}
if (ctx == NULL) {
ctx = apr_pcalloc(c->pool, sizeof(*ctx));
net->out_ctx = (core_output_filter_ctx_t *)ctx;
/*
* Need to create buffered_bb brigade with correct lifetime. Passing
* NULL to ap_save_brigade() would result in a brigade
* allocated from bb->pool which might be wrong.
*/
ctx->buffered_bb = apr_brigade_create(c->pool, c->bucket_alloc);
}
if (new_bb != NULL)
bb = new_bb;
if ((ctx->buffered_bb != NULL) &&
!APR_BRIGADE_EMPTY(ctx->buffered_bb)) {
if (new_bb != NULL) {
APR_BRIGADE_PREPEND(bb, ctx->buffered_bb);
}
else {
bb = ctx->buffered_bb;
}
}
else if (new_bb == NULL) {
c->data_in_output_filters = 0;
return APR_SUCCESS;
}
if (!new_bb || should_send_brigade(bb, c, NULL)) {
apr_socket_t *sock = net->client_socket;
apr_interval_time_t sock_timeout = 0;
/* Non-blocking writes on the socket in any case. */
apr_socket_timeout_get(sock, &sock_timeout);
apr_socket_timeout_set(sock, 0);
do {
rv = send_brigade_nonblocking(sock, bb, ctx, c);
if (new_bb && APR_STATUS_IS_EAGAIN(rv)) {
/* Scan through the brigade and decide whether we must absolutely
* flush the remaining data, based on should_send_brigade() &flush
* rules. If so, wait for writability and retry, otherwise we did
* our best already and can wait for the next call.
*/
int flush;
(void)should_send_brigade(bb, c, &flush);
if (flush) {
apr_int32_t nfd;
apr_pollfd_t pfd;
memset(&pfd, 0, sizeof(pfd));
pfd.reqevents = APR_POLLOUT;
pfd.desc_type = APR_POLL_SOCKET;
pfd.desc.s = sock;
pfd.p = c->pool;
do {
rv = apr_poll(&pfd, 1, &nfd, sock_timeout);
} while (APR_STATUS_IS_EINTR(rv));
}
}
} while (rv == APR_SUCCESS && !APR_BRIGADE_EMPTY(bb));
/* Restore original socket timeout before leaving. */
apr_socket_timeout_set(sock, sock_timeout);
}
if (rv != APR_SUCCESS && !APR_STATUS_IS_EAGAIN(rv)) {
/* The client has aborted the connection */
ap_log_cerror(
APLOG_MARK, APLOG_TRACE1, rv, c,
"core_output_filter: writing data to the network");
/*
* Set c->aborted before apr_brigade_cleanup to have the correct status
* when logging the request as apr_brigade_cleanup triggers the logging
* of the request if it contains an EOR bucket.
*/
c->aborted = 1;
apr_brigade_cleanup(bb);
return rv;
}
setaside_remaining_output(f, ctx, bb, c);
return APR_SUCCESS;
}
/*
* This function assumes that either ctx->buffered_bb == NULL, or
* ctx->buffered_bb is empty, or ctx->buffered_bb == bb
*/
static void setaside_remaining_output(ap_filter_t *f,
core_output_filter_ctx_t *ctx,
apr_bucket_brigade *bb,
conn_rec *c)
{
apr_bucket *bucket;
/* Don't set aside leading empty buckets, all previous data have been
* consumed so it's safe to delete them now.
*/
while (((bucket = APR_BRIGADE_FIRST(bb)) != APR_BRIGADE_SENTINEL(bb)) &&
(APR_BUCKET_IS_METADATA(bucket) || (bucket->length == 0))) {
apr_bucket_delete(bucket);
}
c->data_in_output_filters = 0;
if (!APR_BRIGADE_EMPTY(bb)) {
c->data_in_output_filters = 1;
if (bb != ctx->buffered_bb) {
if (!ctx->deferred_write_pool) {
apr_pool_create(&ctx->deferred_write_pool, c->pool);
apr_pool_tag(ctx->deferred_write_pool, "deferred_write");
}
ap_save_brigade(f, &(ctx->buffered_bb), &bb,
ctx->deferred_write_pool);
}
}
else if (ctx->deferred_write_pool) {
/*
* There are no more requests in the pipeline. We can just clear the
* pool.
*/
apr_pool_clear(ctx->deferred_write_pool);
}
}
#ifndef APR_MAX_IOVEC_SIZE
#define NVEC_MIN 16
#define NVEC_MAX NVEC_MIN
#else
#if APR_MAX_IOVEC_SIZE > 16
#define NVEC_MIN 16
#else
#define NVEC_MIN APR_MAX_IOVEC_SIZE
#endif
#define NVEC_MAX APR_MAX_IOVEC_SIZE
#endif
static APR_INLINE int is_in_memory_bucket(apr_bucket *b)
{
/* These buckets' data are already in memory. */
return APR_BUCKET_IS_HEAP(b)
|| APR_BUCKET_IS_POOL(b)
|| APR_BUCKET_IS_TRANSIENT(b)
|| APR_BUCKET_IS_IMMORTAL(b);
}
#if APR_HAS_SENDFILE
static APR_INLINE int can_sendfile_bucket(apr_bucket *b)
{
/* Use sendfile to send the bucket unless:
* - the bucket is not a file bucket, or
* - the file is too small for sendfile to be useful, or
* - sendfile is disabled in the httpd config via "EnableSendfile off".
*/
if (APR_BUCKET_IS_FILE(b) && b->length >= AP_MIN_SENDFILE_BYTES) {
apr_file_t *file = ((apr_bucket_file *)b->data)->fd;
return apr_file_flags_get(file) & APR_SENDFILE_ENABLED;
}
else {
return 0;
}
}
#endif
#if defined(WIN32) && (APR_MAJOR_VERSION == 1 && APR_MINOR_VERSION <= 7)
#undef APR_TCP_NOPUSH_FLAG
#define APR_TCP_NOPUSH_FLAG 0
#endif
static APR_INLINE void sock_nopush(apr_socket_t *s, int to)
{
/* Disable TCP_NOPUSH handling on OSX since unsetting it won't push
* retained data, which might introduce delays if further data don't
* come soon enough or cause the last chunk to be sent only when the
* connection is shutdown (e.g. after KeepAliveTimeout).
*/
#if APR_TCP_NOPUSH_FLAG && !defined(__APPLE__)
(void)apr_socket_opt_set(s, APR_TCP_NOPUSH, to);
#endif
}
static apr_status_t send_brigade_nonblocking(apr_socket_t *s,
apr_bucket_brigade *bb,
core_output_filter_ctx_t *ctx,
conn_rec *c)
{
apr_status_t rv = APR_SUCCESS;
core_server_config *conf =
ap_get_core_module_config(c->base_server->module_config);
apr_size_t nvec = 0, nbytes = 0;
apr_bucket *bucket, *next;
const char *data;
apr_size_t length;
for (bucket = APR_BRIGADE_FIRST(bb);
bucket != APR_BRIGADE_SENTINEL(bb);
bucket = next) {
next = APR_BUCKET_NEXT(bucket);
#if APR_HAS_SENDFILE
if (can_sendfile_bucket(bucket)) {
if (nvec > 0) {
sock_nopush(s, 1);
rv = writev_nonblocking(s, bb, ctx, nbytes, nvec, c);
if (rv != APR_SUCCESS) {
goto cleanup;
}
nbytes = 0;
nvec = 0;
}
rv = sendfile_nonblocking(s, bucket, ctx, c);
if (rv != APR_SUCCESS) {
goto cleanup;
}
continue;
}
#endif /* APR_HAS_SENDFILE */
if (bucket->length) {
/* Non-blocking read first, in case this is a morphing
* bucket type. */
rv = apr_bucket_read(bucket, &data, &length, APR_NONBLOCK_READ);
if (APR_STATUS_IS_EAGAIN(rv)) {
/* Read would block; flush any pending data and retry. */
if (nvec) {
rv = writev_nonblocking(s, bb, ctx, nbytes, nvec, c);
if (rv != APR_SUCCESS) {
goto cleanup;
}
nbytes = 0;
nvec = 0;
}
sock_nopush(s, 0);
rv = apr_bucket_read(bucket, &data, &length, APR_BLOCK_READ);
}
if (rv != APR_SUCCESS) {
goto cleanup;
}
/* reading may have split the bucket, so recompute next: */
next = APR_BUCKET_NEXT(bucket);
}
if (!bucket->length) {
/* Don't delete empty buckets until all the previous ones have been
* sent (nvec == 0); this must happen in sequence since metabuckets
* like EOR could free the data still pointed to by the iovec. So
* unless the latter is empty, let writev_nonblocking() cleanup the
* brigade in order.
*/
if (!nvec) {
apr_bucket_delete(bucket);
}
continue;
}
/* Make sure that these new data fit in our iovec. */
if (nvec == ctx->nvec) {
if (nvec == NVEC_MAX) {
sock_nopush(s, 1);
rv = writev_nonblocking(s, bb, ctx, nbytes, nvec, c);
if (rv != APR_SUCCESS) {
goto cleanup;
}
nbytes = 0;
nvec = 0;
}
else {
struct iovec *newvec;
apr_size_t newn = nvec * 2;
if (newn < NVEC_MIN) {
newn = NVEC_MIN;
}
else if (newn > NVEC_MAX) {
newn = NVEC_MAX;
}
newvec = apr_palloc(c->pool, newn * sizeof(struct iovec));
if (nvec) {
memcpy(newvec, ctx->vec, nvec * sizeof(struct iovec));
}
ctx->vec = newvec;
ctx->nvec = newn;
}
}
nbytes += length;
ctx->vec[nvec].iov_base = (void *)data;
ctx->vec[nvec].iov_len = length;
nvec++;
/* Flush above max threshold, unless the brigade still contains in
* memory buckets which we want to try writing in the same pass (if
* we are at the end of the brigade, the write will happen outside
* the loop anyway).
*/
if (nbytes > conf->flush_max_threshold
&& next != APR_BRIGADE_SENTINEL(bb)
&& !is_in_memory_bucket(next)) {
sock_nopush(s, 1);
rv = writev_nonblocking(s, bb, ctx, nbytes, nvec, c);
if (rv != APR_SUCCESS) {
goto cleanup;
}
nbytes = 0;
nvec = 0;
}
}
if (nvec > 0) {
rv = writev_nonblocking(s, bb, ctx, nbytes, nvec, c);
}
cleanup:
sock_nopush(s, 0);
return rv;
}
static apr_status_t writev_nonblocking(apr_socket_t *s,
apr_bucket_brigade *bb,
core_output_filter_ctx_t *ctx,
apr_size_t bytes_to_write,
apr_size_t nvec,
conn_rec *c)
{
apr_status_t rv;
struct iovec *vec = ctx->vec;
apr_size_t bytes_written = 0;
apr_size_t i, offset = 0;
do {
apr_size_t n = 0;
rv = apr_socket_sendv(s, vec + offset, nvec - offset, &n);
bytes_written += n;
for (i = offset; i < nvec; ) {
apr_bucket *bucket = APR_BRIGADE_FIRST(bb);
if (!bucket->length) {
apr_bucket_delete(bucket);
}
else if (n >= vec[i].iov_len) {
apr_bucket_delete(bucket);
n -= vec[i++].iov_len;
offset++;
}
else {
if (n) {
apr_bucket_split(bucket, n);
apr_bucket_delete(bucket);
vec[i].iov_len -= n;
vec[i].iov_base = (char *) vec[i].iov_base + n;
}
break;
}
}
} while (rv == APR_SUCCESS && bytes_written < bytes_to_write);
if ((ap__logio_add_bytes_out != NULL) && (bytes_written > 0)) {
ap__logio_add_bytes_out(c, bytes_written);
}
ctx->bytes_written += bytes_written;
ap_log_cerror(APLOG_MARK, APLOG_TRACE6, rv, c,
"writev_nonblocking: %"APR_SIZE_T_FMT"/%"APR_SIZE_T_FMT,
bytes_written, bytes_to_write);
return rv;
}
#if APR_HAS_SENDFILE
static apr_status_t sendfile_nonblocking(apr_socket_t *s,
apr_bucket *bucket,
core_output_filter_ctx_t *ctx,
conn_rec *c)
{
apr_status_t rv;
apr_file_t *file = ((apr_bucket_file *)bucket->data)->fd;
apr_size_t bytes_written = bucket->length; /* bytes_to_write for now */
apr_off_t file_offset = bucket->start;
rv = apr_socket_sendfile(s, file, NULL, &file_offset, &bytes_written, 0);
if ((ap__logio_add_bytes_out != NULL) && (bytes_written > 0)) {
ap__logio_add_bytes_out(c, bytes_written);
}
ctx->bytes_written += bytes_written;
ap_log_cerror(APLOG_MARK, APLOG_TRACE6, rv, c,
"sendfile_nonblocking: %" APR_SIZE_T_FMT "/%" APR_SIZE_T_FMT,
bytes_written, bucket->length);
if (bytes_written >= bucket->length) {
apr_bucket_delete(bucket);
}
else if (bytes_written > 0) {
apr_bucket_split(bucket, bytes_written);
apr_bucket_delete(bucket);
if (rv == APR_SUCCESS) {
rv = APR_EAGAIN;
}
}
return rv;
}
#endif