| #include "network_backends.h" |
| |
| #ifdef USE_WRITEV |
| |
| #include <sys/types.h> |
| #include <sys/socket.h> |
| #include <sys/uio.h> |
| #include <sys/stat.h> |
| #include <sys/time.h> |
| #include <sys/resource.h> |
| #include <netinet/in.h> |
| #include <netinet/tcp.h> |
| |
| #include <errno.h> |
| #include <fcntl.h> |
| #include <unistd.h> |
| #include <netdb.h> |
| #include <string.h> |
| #include <stdlib.h> |
| #include <limits.h> |
| #include <stdio.h> |
| #include <assert.h> |
| |
| #include "network.h" |
| #include "fdevent.h" |
| #include "log.h" |
| #include "stat_cache.h" |
| |
| #if 0 |
| #define LOCAL_BUFFERING 1 |
| #endif |
| |
| int network_write_chunkqueue_writev(server *srv, connection *con, int fd, chunkqueue *cq) { |
| chunk *c; |
| size_t chunks_written = 0; |
| |
| for(c = cq->first; c; c = c->next) { |
| int chunk_finished = 0; |
| |
| switch(c->type) { |
| case MEM_CHUNK: { |
| char * offset; |
| size_t toSend; |
| ssize_t r; |
| |
| size_t num_chunks, i; |
| struct iovec *chunks; |
| chunk *tc; |
| size_t num_bytes = 0; |
| #if defined(_SC_IOV_MAX) /* IRIX, MacOS X, FreeBSD, Solaris, ... */ |
| const size_t max_chunks = sysconf(_SC_IOV_MAX); |
| #elif defined(IOV_MAX) /* Linux x86 (glibc-2.3.6-3) */ |
| const size_t max_chunks = IOV_MAX; |
| #elif defined(MAX_IOVEC) /* Linux ia64 (glibc-2.3.3-98.28) */ |
| const size_t max_chunks = MAX_IOVEC; |
| #elif defined(UIO_MAXIOV) /* Linux x86 (glibc-2.2.5-233) */ |
| const size_t max_chunks = UIO_MAXIOV; |
| #elif (defined(__FreeBSD__) && __FreeBSD_version < 500000) || defined(__DragonFly__) || defined(__APPLE__) |
| /* - FreeBSD 4.x |
| * - MacOS X 10.3.x |
| * (covered in -DKERNEL) |
| * */ |
| const size_t max_chunks = 1024; /* UIO_MAXIOV value from sys/uio.h */ |
| #else |
| #error "sysconf() doesnt return _SC_IOV_MAX ..., check the output of 'man writev' for the EINVAL error and send the output to jan@kneschke.de" |
| #endif |
| |
| /* we can't send more then SSIZE_MAX bytes in one chunk */ |
| |
| /* build writev list |
| * |
| * 1. limit: num_chunks < max_chunks |
| * 2. limit: num_bytes < SSIZE_MAX |
| */ |
| for (num_chunks = 0, tc = c; tc && tc->type == MEM_CHUNK && num_chunks < max_chunks; num_chunks++, tc = tc->next); |
| |
| chunks = calloc(num_chunks, sizeof(*chunks)); |
| |
| for(tc = c, i = 0; i < num_chunks; tc = tc->next, i++) { |
| if (tc->mem->used == 0) { |
| chunks[i].iov_base = tc->mem->ptr; |
| chunks[i].iov_len = 0; |
| } else { |
| offset = tc->mem->ptr + tc->offset; |
| toSend = tc->mem->used - 1 - tc->offset; |
| |
| chunks[i].iov_base = offset; |
| |
| /* protect the return value of writev() */ |
| if (toSend > SSIZE_MAX || |
| num_bytes + toSend > SSIZE_MAX) { |
| chunks[i].iov_len = SSIZE_MAX - num_bytes; |
| |
| num_chunks = i + 1; |
| break; |
| } else { |
| chunks[i].iov_len = toSend; |
| } |
| |
| num_bytes += toSend; |
| } |
| } |
| |
| if ((r = writev(fd, chunks, num_chunks)) < 0) { |
| switch (errno) { |
| case EAGAIN: |
| case EINTR: |
| r = 0; |
| break; |
| case EPIPE: |
| case ECONNRESET: |
| free(chunks); |
| return -2; |
| default: |
| log_error_write(srv, __FILE__, __LINE__, "ssd", |
| "writev failed:", strerror(errno), fd); |
| |
| free(chunks); |
| return -1; |
| } |
| } |
| |
| cq->bytes_out += r; |
| |
| /* check which chunks have been written */ |
| |
| for(i = 0, tc = c; i < num_chunks; i++, tc = tc->next) { |
| if (r >= (ssize_t)chunks[i].iov_len) { |
| /* written */ |
| r -= chunks[i].iov_len; |
| tc->offset += chunks[i].iov_len; |
| |
| if (chunk_finished) { |
| /* skip the chunks from further touches */ |
| chunks_written++; |
| c = c->next; |
| } else { |
| /* chunks_written + c = c->next is done in the for()*/ |
| chunk_finished++; |
| } |
| } else { |
| /* partially written */ |
| |
| tc->offset += r; |
| chunk_finished = 0; |
| |
| break; |
| } |
| } |
| free(chunks); |
| |
| break; |
| } |
| case FILE_CHUNK: { |
| ssize_t r; |
| off_t abs_offset; |
| off_t toSend; |
| stat_cache_entry *sce = NULL; |
| |
| #define KByte * 1024 |
| #define MByte * 1024 KByte |
| #define GByte * 1024 MByte |
| const off_t we_want_to_mmap = 512 KByte; |
| char *start = NULL; |
| |
| if (HANDLER_ERROR == stat_cache_get_entry(srv, con, c->file.name, &sce)) { |
| log_error_write(srv, __FILE__, __LINE__, "sb", |
| strerror(errno), c->file.name); |
| return -1; |
| } |
| |
| abs_offset = c->file.start + c->offset; |
| |
| if (abs_offset > sce->st.st_size) { |
| log_error_write(srv, __FILE__, __LINE__, "sb", |
| "file was shrinked:", c->file.name); |
| |
| return -1; |
| } |
| |
| /* mmap the buffer |
| * - first mmap |
| * - new mmap as the we are at the end of the last one */ |
| if (c->file.mmap.start == MAP_FAILED || |
| abs_offset == (off_t)(c->file.mmap.offset + c->file.mmap.length)) { |
| |
| /* Optimizations for the future: |
| * |
| * adaptive mem-mapping |
| * the problem: |
| * we mmap() the whole file. If someone has alot large files and 32bit |
| * machine the virtual address area will be unrun and we will have a failing |
| * mmap() call. |
| * solution: |
| * only mmap 16M in one chunk and move the window as soon as we have finished |
| * the first 8M |
| * |
| * read-ahead buffering |
| * the problem: |
| * sending out several large files in parallel trashes the read-ahead of the |
| * kernel leading to long wait-for-seek times. |
| * solutions: (increasing complexity) |
| * 1. use madvise |
| * 2. use a internal read-ahead buffer in the chunk-structure |
| * 3. use non-blocking IO for file-transfers |
| * */ |
| |
| /* all mmap()ed areas are 512kb expect the last which might be smaller */ |
| off_t we_want_to_send; |
| size_t to_mmap; |
| |
| /* this is a remap, move the mmap-offset */ |
| if (c->file.mmap.start != MAP_FAILED) { |
| munmap(c->file.mmap.start, c->file.mmap.length); |
| c->file.mmap.offset += we_want_to_mmap; |
| } else { |
| /* in case the range-offset is after the first mmap()ed area we skip the area */ |
| c->file.mmap.offset = 0; |
| |
| while (c->file.mmap.offset + we_want_to_mmap < c->file.start) { |
| c->file.mmap.offset += we_want_to_mmap; |
| } |
| } |
| |
| /* length is rel, c->offset too, assume there is no limit at the mmap-boundaries */ |
| we_want_to_send = c->file.length - c->offset; |
| to_mmap = (c->file.start + c->file.length) - c->file.mmap.offset; |
| |
| /* we have more to send than we can mmap() at once */ |
| if (abs_offset + we_want_to_send > c->file.mmap.offset + we_want_to_mmap) { |
| we_want_to_send = (c->file.mmap.offset + we_want_to_mmap) - abs_offset; |
| to_mmap = we_want_to_mmap; |
| } |
| |
| if (-1 == c->file.fd) { /* open the file if not already open */ |
| if (-1 == (c->file.fd = open(c->file.name->ptr, O_RDONLY))) { |
| log_error_write(srv, __FILE__, __LINE__, "sbs", "open failed for:", c->file.name, strerror(errno)); |
| |
| return -1; |
| } |
| #ifdef FD_CLOEXEC |
| fcntl(c->file.fd, F_SETFD, FD_CLOEXEC); |
| #endif |
| } |
| |
| if (MAP_FAILED == (c->file.mmap.start = mmap(0, to_mmap, PROT_READ, MAP_SHARED, c->file.fd, c->file.mmap.offset))) { |
| /* close it here, otherwise we'd have to set FD_CLOEXEC */ |
| |
| log_error_write(srv, __FILE__, __LINE__, "ssbd", "mmap failed:", |
| strerror(errno), c->file.name, c->file.fd); |
| |
| return -1; |
| } |
| |
| c->file.mmap.length = to_mmap; |
| #ifdef LOCAL_BUFFERING |
| buffer_copy_string_len(c->mem, c->file.mmap.start, c->file.mmap.length); |
| #else |
| #ifdef HAVE_MADVISE |
| /* don't advise files < 64Kb */ |
| if (c->file.mmap.length > (64 KByte)) { |
| /* darwin 7 is returning EINVAL all the time and I don't know how to |
| * detect this at runtime.i |
| * |
| * ignore the return value for now */ |
| madvise(c->file.mmap.start, c->file.mmap.length, MADV_WILLNEED); |
| } |
| #endif |
| #endif |
| |
| /* chunk_reset() or chunk_free() will cleanup for us */ |
| } |
| |
| /* to_send = abs_mmap_end - abs_offset */ |
| toSend = (c->file.mmap.offset + c->file.mmap.length) - (abs_offset); |
| |
| if (toSend < 0) { |
| log_error_write(srv, __FILE__, __LINE__, "soooo", |
| "toSend is negative:", |
| toSend, |
| c->file.mmap.length, |
| abs_offset, |
| c->file.mmap.offset); |
| assert(toSend < 0); |
| } |
| |
| #ifdef LOCAL_BUFFERING |
| start = c->mem->ptr; |
| #else |
| start = c->file.mmap.start; |
| #endif |
| |
| if ((r = write(fd, start + (abs_offset - c->file.mmap.offset), toSend)) < 0) { |
| switch (errno) { |
| case EAGAIN: |
| case EINTR: |
| r = 0; |
| break; |
| case EPIPE: |
| case ECONNRESET: |
| return -2; |
| default: |
| log_error_write(srv, __FILE__, __LINE__, "ssd", |
| "write failed:", strerror(errno), fd); |
| |
| return -1; |
| } |
| } |
| |
| c->offset += r; |
| cq->bytes_out += r; |
| |
| if (c->offset == c->file.length) { |
| chunk_finished = 1; |
| |
| /* we don't need the mmaping anymore */ |
| if (c->file.mmap.start != MAP_FAILED) { |
| munmap(c->file.mmap.start, c->file.mmap.length); |
| c->file.mmap.start = MAP_FAILED; |
| } |
| } |
| |
| break; |
| } |
| default: |
| |
| log_error_write(srv, __FILE__, __LINE__, "ds", c, "type not known"); |
| |
| return -1; |
| } |
| |
| if (!chunk_finished) { |
| /* not finished yet */ |
| |
| break; |
| } |
| |
| chunks_written++; |
| } |
| |
| return chunks_written; |
| } |
| |
| #endif |