blob: eaaa402e056b75aa468143dc8e38936a973a7f1b [file] [log] [blame]
/** @file
A brief file description
@section license License
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <signal.h>
#include <string.h>
#include <errno.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <unistd.h>
#include <fcntl.h>
#include <assert.h>
#include <poll.h>
#include <netinet/tcp.h>
#include <sys/resource.h>
#include <limits.h>
#include <sys/mman.h>
#include <cmath>
#include <openssl/md5.h>
#include <inttypes.h>
#include <time.h>
#include <sys/time.h>
#include <stdlib.h>
#include "tscore/ink_defs.h"
#include "tscore/ink_error.h"
#include "tscore/ink_memory.h"
#include "tscore/ink_assert.h"
#include "tscore/INK_MD5.h"
#include "tscore/ParseRules.h"
#include "tscore/ink_time.h"
#include "tscore/ink_args.h"
#include "tscore/I_Version.h"
#include "tscpp/util/TextView.h"
/*
FTP - Traffic Server Template
220 i5 FTP server (Version wu-2.4(3) Mon Jul 8 14:39:48 PDT 1996) ready.
USER anonymous
331 Guest login ok, send your complete e-mail address as password.
PASS traffic_server@inktomi.com
230 Guest login ok, access restrictions apply.
CWD .
250 CWD command successful.
TYPE I
200 Type set to I.
PASV
227 Entering Passive Mode (128,174,5,14,16,238)
RETR foo
LIST
150 Opening ASCII mode data connection for /bin/ls.
*/
#define MAX_URL_LEN 1024
//
// Compilation Options
//
#define SERVER_BUFSIZE 4096
#define CLIENT_BUFSIZE 2048
#define MAX_BUFSIZE (65536 + 4096)
//
// Constants
//
#define MAXFDS 65536
#define HEADER_DONE -1
#define POLL_GROUP_SIZE 800
#define MAX_RESPONSE_LENGTH 1000000
#define HEADER_SIZE 10000
#define POLL_TIMEOUT 10
#define STATE_FTP_DATA_READY 0xFAD
#define MAX_DEFERED_URLS 10000
#define DEFERED_URLS_BLOCK 2000
#define MAX_REQUEST_BODY_LENGTH MAX_RESPONSE_LENGTH
#define JTEST_DONE 0
#define JTEST_CONT 1
static AppVersionInfo appVersionInfo;
static const char *hexdigits = "0123456789ABCDEFabcdef";
static const char *dontunescapify = "#;/?+=&:@%";
static const char *dontescapify = "#;/?+=&:@~.-_%";
enum FTP_MODE {
FTP_NULL,
FTP_PORT,
FTP_PASV,
};
typedef int (*accept_fn_t)(int);
typedef int (*poll_cb)(int);
static int read_request(int sock);
static int write_request(int sock);
static int make_client(unsigned int addr, int port);
static void make_bfc_client(unsigned int addr, int port);
static int make_url_client(const char *url, const char *base_url = 0, bool seen = false, bool unthrottled = false);
static int write_ftp_response(int sock);
static void interval_report();
static void undefer_url(bool unthrottled = false);
static void done();
static int is_done();
static int open_server(unsigned short int port, accept_fn_t accept_fn);
static int accept_ftp_data(int sock);
static char **defered_urls = nullptr;
static int n_defered_urls = 0;
static int server_fd = 0;
static int server_port = 0;
static int proxy_port = 8080;
static unsigned int proxy_addr = 0;
static unsigned int local_addr = 0;
static char proxy_host[81] = "localhost";
static char local_host[255 + 1];
static int verbose = 0;
static int verbose_errors = 1;
static int debug = 0;
static int nclients = 100;
static int current_clients = 0;
static int client_speed = 0;
static int check_content = 0;
static int nocheck_length = 0;
static int obey_redirects = 1;
static int only_clients = 0;
static int only_server = 0;
static int drop_after_CL = 0;
static int server_speed = 0;
static int server_delay = 0;
static int interval = 1;
static int sbuffersize = SERVER_BUFSIZE;
static int cbuffersize = CLIENT_BUFSIZE;
static int test_time = 0;
static int last_fd = -1;
static char *response_buffer = nullptr;
static int errors = 0;
static int clients = 0, running_clients = 0, new_clients = 0, total_clients = 0;
static int servers = 0, running_servers = 0, new_servers = 0, total_servers = 0;
static float running_ops = 0;
static int new_ops = 0;
static float total_ops = 0;
static int running_sops = 0, new_sops = 0, total_sops = 0;
static int running_latency = 0, latency = 0;
static int lat_ops = 0, b1_ops = 0, running_b1latency = 0, b1latency = 0;
static uint64_t running_cbytes = 0, new_cbytes = 0, total_cbytes = 0;
static uint64_t running_tbytes = 0, new_tbytes = 0, total_tbytes = 0;
static int average_over = 5;
static double hitrate = 0.4;
static int hotset = 1000;
static int keepalive = 4;
static int keepalive_cons = 4;
static int follow_arg = 0;
static int follow = 0;
static int follow_same_arg = 0;
static int follow_same = 0;
static char current_host[512];
static int fullpage = 0;
static int show_before = 0;
static int show_headers = 0;
static int server_keepalive = 4;
static int urls_mode = 0;
static int pipeline = 1;
static int hostrequest = 0;
static int ftp = 0;
static double ftp_mdtm_err_rate = 0.0;
static int ftp_mdtm_rate = 0;
static time_t ftp_mdtm_last_update = 0;
static char ftp_mdtm_str[64];
static int embed_url = 1;
static double ims_rate = 0.5;
static double client_abort_rate = 0.0;
static double server_abort_rate = 0.0;
static int compd_port = 0;
static int compd_suite = 0;
static int ka_cache_head[500];
static int ka_cache_tail[500];
static int n_ka_cache = 0;
static char urls_file[256] = "";
static FILE *urls_fp = nullptr;
static char urlsdump_file[256] = "";
static FILE *urlsdump_fp = nullptr;
static int drand_seed = 0;
static int docsize = -1;
static int url_hash_entries = 1000000;
static char url_hash_filename[256] = "";
static int bandwidth_test = 0;
static int bandwidth_test_to_go = 0;
static uint64_t total_client_request_bytes = 0;
static uint64_t total_proxy_request_bytes = 0;
static uint64_t total_server_response_body_bytes = 0;
static uint64_t total_server_response_header_bytes = 0;
static uint64_t total_proxy_response_body_bytes = 0;
static uint64_t total_proxy_response_header_bytes = 0;
static ink_hrtime now = 0, start_time = 0;
static int extra_headers = 0;
static int alternates = 0;
static int abort_retry_speed = 0;
static int abort_retry_bytes = 0;
static int abort_retry_secs = 5;
static int client_rate = 0;
static double reload_rate = 0;
static int vary_user_agent = 0;
static int server_content_type = 0;
static int request_extension = 0;
static int no_cache = 0;
static double evo_rate = 0.0;
static double zipf = 0.0;
static int zipf_bucket_size = 1;
static int range_mode = 0;
static int post_support = 0;
static int post_size = 0;
static const ArgumentDescription argument_descriptions[] = {
{"proxy_port", 'p', "Proxy Port", "I", &proxy_port, "JTEST_PROXY_PORT", nullptr},
{"proxy_host", 'P', "Proxy Host", "S80", &proxy_host, "JTEST_PROXY_HOST", nullptr},
{"server_port", 's', "Server Port (0:auto select)", "I", &server_port, "JTEST_SERVER_PORT", nullptr},
{"server_host", 'S', "Server Host (null:localhost)", "S80", &local_host, "JTEST_SERVER_HOST", nullptr},
{"server_speed", 'r', "Server Bytes Per Second (0:unlimit)", "I", &server_speed, "JTEST_SERVER_SPEED", nullptr},
{"server_delay", 'w', "Server Initial Delay (msec)", "I", &server_delay, "JTEST_SERVER_INITIAL_DELAY", nullptr},
{"clients", 'c', "Clients", "I", &nclients, "JTEST_CLIENTS", nullptr},
{"client_speed", 'R', "Client Bytes Per Second (0:unlimit)", "I", &client_speed, "JTEST_CLIENT_SPEED", nullptr},
{"sbuffersize", 'b', "Server Buffer Size", "I", &sbuffersize, "JTEST_SERVER_BUFSIZE", nullptr},
{"cbuffersize", 'B', "Client Buffer Size", "I", &cbuffersize, "JTEST_CLIENT_BUFSIZE", nullptr},
{"average_over", 'a', "Seconds to Average Over", "I", &average_over, "JTEST_AVERAGE_OVER", nullptr},
{"hitrate", 'z', "Hit Rate", "D", &hitrate, "JTEST_HITRATE", nullptr},
{"hotset", 'Z', "Hotset Size", "I", &hotset, "JTEST_HOTSET", nullptr},
{"interval", 'i', "Reporting Interval (seconds)", "I", &interval, "JTEST_INTERVAL", nullptr},
{"keepalive", 'k', "Keep-Alive Length", "I", &keepalive, "JTEST_KEEPALIVE", nullptr},
{"keepalive_cons", 'K', "# Keep-Alive Connections (0:unlimit)", "I", &keepalive_cons, "JTEST_KEEPALIVE_CONNECTIONS", nullptr},
{"docsize", 'L', "Document Size (-1:varied)", "I", &docsize, "JTEST_DOCSIZE", nullptr},
{"skeepalive", 'j', "Server Keep-Alive (0:unlimit)", "I", &server_keepalive, "JTEST_SERVER_KEEPALIVE", nullptr},
{"show_urls", 'x', "Show URLs before they are accessed", "F", &show_before, "JTEST_SHOW_URLS", nullptr},
{"show_headers", 'X', "Show Headers", "F", &show_headers, "JTEST_SHOW_HEADERS", nullptr},
{"ftp", 'f', "FTP Requests", "F", &ftp, "JTEST_FTP", nullptr},
{"ftp_mdtm_err_rate", ' ', "FTP MDTM 550 Error Rate", "D", &ftp_mdtm_err_rate, "JTEST_FTP_MDTM_ERR_RATE", nullptr},
{"ftp_mdtm_rate", ' ', "FTP MDTM Update Rate (sec, 0:never)", "I", &ftp_mdtm_rate, "JTEST_FTP_MDTM_RATE", nullptr},
{"fullpage", 'l', "Full Page (Images)", "F", &fullpage, "JTEST_FULLPAGE", nullptr},
{"follow", 'F', "Follow Links", "F", &follow_arg, "JTEST_FOLLOW", nullptr},
{"same_host", 'J', "Only follow URLs on same host", "F", &follow_same_arg, "JTEST_FOLLOW_SAME", nullptr},
{"test_time", 't', "run for N seconds (0:unlimited)", "I", &test_time, "TEST_TIME", nullptr},
{"urls", 'u', "URLs from File", "S256", urls_file, "JTEST_URLS", nullptr},
{"urlsdump", 'U', "URLs to File", "S256", urlsdump_file, "JTEST_URLS_DUMP", nullptr},
{"hostrequest", 'H', "Host Request(1=yes,2=transparent)", "I", &hostrequest, "JTEST_HOST_REQUEST", nullptr},
{"check_content", 'C', "Check returned content", "F", &check_content, "JTEST_CHECK_CONTENT", nullptr},
{"nocheck_length", ' ', "Don't check returned length", "F", &nocheck_length, "JTEST_NOCHECK_LENGTH", nullptr},
{"obey_redirects", 'm', "Obey Redirects", "f", &obey_redirects, "JTEST_OBEY_REDIRECTS", nullptr},
{"embed URL", 'M', "Embed URL in synth docs", "f", &embed_url, "JTEST_EMBED_URL", nullptr},
{"url_hash_entries", 'q', "URL Hash Table Size (-1:use file size)", "I", &url_hash_entries, "JTEST_URL_HASH_ENTRIES", nullptr},
{"url_hash_filename", 'Q', "URL Hash Table Filename", "S256", url_hash_filename, "JTEST_URL_HASH_FILENAME", nullptr},
{"only_clients", 'y', "Only Clients", "F", &only_clients, "JTEST_ONLY_CLIENTS", nullptr},
{"only_server", 'Y', "Only Server", "F", &only_server, "JTEST_ONLY_SERVER", nullptr},
{"bandwidth_test", 'A', "Bandwidth Test", "I", &bandwidth_test, "JTEST_BANDWIDTH_TEST", nullptr},
{"drop_after_CL", 'T', "Drop after Content-Length", "F", &drop_after_CL, "JTEST_DROP", nullptr},
{"verbose", 'v', "Verbose Flag", "F", &verbose, "JTEST_VERBOSE", nullptr},
{"verbose_errors", 'E', "Verbose Errors Flag", "f", &verbose_errors, "JTEST_VERBOSE_ERRORS", nullptr},
{"drand", 'D', "Random Number Seed", "I", &drand_seed, "JTEST_DRAND", nullptr},
{"ims_rate", 'I', "IMS Not-Changed Rate", "D", &ims_rate, "JTEST_IMS_RATE", nullptr},
{"client_abort_rate", 'g', "Client Abort Rate", "D", &client_abort_rate, "JTEST_CLIENT_ABORT_RATE", nullptr},
{"server_abort_rate", 'G', "Server Abort Rate", "D", &server_abort_rate, "JTEST_SERVER_ABORT_RATE", nullptr},
{"extra_headers", 'n', "Number of Extra Headers", "I", &extra_headers, "JTEST_EXTRA_HEADERS", nullptr},
{"alternates", 'N', "Number of Alternates", "I", &alternates, "JTEST_ALTERNATES", nullptr},
{"client_rate", 'e', "Clients Per Sec", "I", &client_rate, "JTEST_CLIENT_RATE", nullptr},
{"abort_retry_speed", 'o', "Abort/Retry Speed", "I", &abort_retry_speed, "JTEST_ABORT_RETRY_SPEED", nullptr},
{"abort_retry_bytes", ' ', "Abort/Retry Threshold (bytes)", "I", &abort_retry_bytes, "JTEST_ABORT_RETRY_THRESHHOLD_BYTES",
nullptr},
{"abort_retry_secs", ' ', "Abort/Retry Threshhold (secs)", "I", &abort_retry_secs, "JTEST_ABORT_RETRY_THRESHHOLD_SECS", nullptr},
{"reload_rate", 'W', "Reload Rate", "D", &reload_rate, "JTEST_RELOAD_RATE", nullptr},
{"compd_port", 'O', "Compd port", "I", &compd_port, "JTEST_COMPD_PORT", nullptr},
{"compd_suite", '1', "Compd Suite", "F", &compd_suite, "JTEST_COMPD_SUITE", nullptr},
{"vary_user_agent", '2', "Vary on User-Agent (use w/ alternates)", "I", &vary_user_agent, "JTEST_VARY_ON_USER_AGENT", nullptr},
{"content_type", '3', "Server Content-Type (1 html, 2 jpeg)", "I", &server_content_type, "JTEST_CONTENT_TYPE", nullptr},
{"request_extension", '4', "Request Extn (1\".html\" 2\".jpeg\" 3\"/\")", "I", &request_extension, "JTEST_REQUEST_EXTENSION",
nullptr},
{"no_cache", '5', "Send Server no-cache", "I", &no_cache, "JTEST_NO_CACHE", nullptr},
{"zipf_bucket", '7', "Bucket size (of 1M buckets) for Zipf", "I", &zipf_bucket_size, "JTEST_ZIPF_BUCKET_SIZE", nullptr},
{"zipf", '8', "Use a Zipf distribution with this alpha (say 1.2)", "D", &zipf, "JTEST_ZIPF", nullptr},
{"evo_rate", '9', "Evolving Hotset Rate (evolutions/hour)", "D", &evo_rate, "JTEST_EVOLVING_HOTSET_RATE", nullptr},
{"debug", 'd', "Debug Flag", "F", &debug, "JTEST_DEBUG", nullptr},
{"range_mode", ' ', "Range Mode", "I", &range_mode, "JTEST_RANGE_MODE", nullptr},
{"post_support", ' ', "POST Mode (0 disable(default), 1 random, 2 specified size by post_size)", "I", &post_support,
"JTEST_POST_MODE", nullptr},
{"post_size", ' ', "POST SIZE", "I", &post_size, "JTEST_POST_SIZE", nullptr},
HELP_ARGUMENT_DESCRIPTION(),
VERSION_ARGUMENT_DESCRIPTION()};
int n_argument_descriptions = countof(argument_descriptions);
struct FD {
int fd;
poll_cb read_cb;
poll_cb write_cb;
ink_hrtime start;
ink_hrtime active;
ink_hrtime ready;
double doc;
int doc_length;
struct sockaddr_in name;
int state; // request parsing state
int req_pos; // request read position
char *base_url = nullptr;
char *req_header = nullptr;
char *response = nullptr;
char *response_header = nullptr;
int length;
int response_length;
int response_remaining;
int keepalive = 0;
int next;
int nalternate = 0;
unsigned int ip = 0;
unsigned int binary : 1;
unsigned int ims : 1;
unsigned int range : 1;
unsigned int drop_after_CL : 1;
unsigned int client_abort : 1;
unsigned int jg_compressed : 1;
int *count;
int bytes;
int ftp_data_fd = 0;
FTP_MODE ftp_mode;
unsigned int ftp_peer_addr;
unsigned short ftp_peer_port;
unsigned long range_bytes;
unsigned long range_end;
unsigned long range_start;
int post_size;
int total_length;
int post_cl;
int send_header;
int header_size;
void
reset()
{
next = 0;
fd = -1;
read_cb = nullptr;
write_cb = nullptr;
state = 0;
start = 0;
active = 0;
ready = 0;
req_pos = 0;
length = 0;
range = 0;
range_bytes = 0;
range_start = 0;
range_end = 0;
post_size = 0;
send_header = 0;
if (!urls_mode) {
response = nullptr;
}
if (response_header) {
response_header[0] = 0;
}
response_length = 0;
response_remaining = 0;
count = nullptr;
bytes = 0;
doc = 0.0;
doc_length = 0;
ims = 0;
drop_after_CL = ::drop_after_CL;
client_abort = 0;
jg_compressed = 0;
ftp_mode = FTP_NULL;
ftp_peer_addr = 0;
ftp_peer_port = 0;
total_length = 0;
post_cl = 0;
header_size = 0;
}
void close();
FD() : binary(0)
{
ink_zero(name);
reset();
}
};
FD *fd = nullptr;
void
FD::close()
{
if (verbose) {
printf("close: %d\n", fd);
}
::close(fd);
if (is_done()) {
done();
}
keepalive = 0;
ip = 0;
if (count) {
(*count)--;
}
if (count == &clients) {
current_clients--;
}
reset();
if (urls_mode) {
undefer_url();
}
ftp_data_fd = 0;
}
#define MAX_FILE_ARGUMENTS 100
typedef struct {
char sche[MAX_URL_LEN + 1];
char host[MAX_URL_LEN + 1];
char port[MAX_URL_LEN + 1];
char path[MAX_URL_LEN + 1];
char frag[MAX_URL_LEN + 1];
char quer[MAX_URL_LEN + 1];
char para[MAX_URL_LEN + 1];
int sche_exists;
int host_exists;
int port_exists;
int path_exists;
int frag_exists;
int quer_exists;
int para_exists;
int rel_url;
int leading_slash;
int is_path_name;
} InkWebURLComponents;
static int ink_web_remove_dots(char *src, char *dest, int *leadingslash, int max_dest_len);
static int ink_web_unescapify_string(char *dest_in, char *src_in, int max_dest_len);
static int ink_web_escapify_string(char *dest_in, char *src_in, int max_dest_len);
static void ink_web_decompose_url(const char *src_url, char *sche, char *host, char *port, char *path, char *frag, char *quer,
char *para, int *real_sche_exists, int *real_host_exists, int *real_port_exists,
int *real_path_exists, int *real_frag_exists, int *real_quer_exists, int *real_para_exists,
int *real_relative_url, int *real_leading_slash);
static void ink_web_canonicalize_url(const char *base_url, const char *emb_url, char *dest_url, int max_dest_url_len);
static void ink_web_decompose_url_into_structure(const char *url, InkWebURLComponents *c);
static void
remove_last_seg(char *src, char *dest)
{
char *ptr;
for (ptr = src + strlen(src) - 1; ptr >= src; ptr--) {
if (*ptr == '/') {
break;
}
}
while (src <= ptr) {
*dest++ = *src++;
}
*dest = '\0';
}
static inline void
remove_multiple_slash(char *src, char *dest)
{
char *ptr = nullptr;
for (ptr = src; *ptr;) {
*(dest++) = *ptr;
if (*ptr == '/') {
while ((*ptr == '/') && *ptr) {
ptr++;
}
} else {
ptr++;
}
}
*dest = '\0';
}
static inline void
append_string(char *dest, const char *src, int *offset_ptr, int max_len)
{
int num = strlen(src);
if (*offset_ptr + num >= max_len) {
num = max_len - (*offset_ptr + 1);
if (num <= 1) {
return;
}
}
memcpy(dest + *offset_ptr, src, num);
dest[*offset_ptr + num] = '\0';
(*offset_ptr) += num;
}
// End Library functions
static void
panic(const char *s)
{
fputs(s, stderr);
exit(1);
}
static void
panic_perror(const char *s)
{
perror(s);
exit(1);
}
static int
max_limit_fd()
{
struct rlimit rl;
if (getrlimit(RLIMIT_NOFILE, &rl) >= 0) {
#ifdef OPEN_MAX
// Darwin
rl.rlim_cur = std::min(static_cast<rlim_t>(OPEN_MAX), rl.rlim_max);
#else
rl.rlim_cur = rl.rlim_max;
#endif
if (setrlimit(RLIMIT_NOFILE, &rl) >= 0) {
if (getrlimit(RLIMIT_NOFILE, &rl) >= 0) {
return rl.rlim_cur;
}
}
}
panic_perror("couldn't set RLIMIT_NOFILE\n");
return -1;
}
static int
read_ready(int fd_in)
{
struct pollfd p;
p.events = POLLIN;
p.fd = fd_in;
int r = poll(&p, 1, 0);
if (r <= 0) {
return r;
}
if (p.revents & (POLLERR | POLLNVAL)) {
return -1;
}
if (p.revents & (POLLIN | POLLHUP)) {
return 1;
}
return 0;
}
static void
poll_init(int sock)
{
if (!fd[sock].req_header) {
fd[sock].req_header = (char *)malloc(HEADER_SIZE * pipeline + MAX_REQUEST_BODY_LENGTH);
}
if (!fd[sock].response_header) {
fd[sock].response_header = (char *)malloc(HEADER_SIZE);
}
if (!fd[sock].base_url) {
fd[sock].base_url = (char *)malloc(HEADER_SIZE);
}
fd[sock].reset();
}
static void
poll_set(int sock, poll_cb read_cb, poll_cb write_cb = nullptr)
{
if (verbose) {
printf("adding poll %d %s %s\n", sock, read_cb ? "READ" : "-", write_cb ? "WRITE" : "-");
}
fd[sock].fd = sock;
fd[sock].read_cb = read_cb;
fd[sock].write_cb = write_cb;
if (last_fd < sock) {
last_fd = sock;
}
}
static void
poll_init_set(int sock, poll_cb read_cb, poll_cb write_cb = nullptr)
{
poll_init(sock);
poll_set(sock, read_cb, write_cb);
}
static int
fast(int sock, int speed, int d)
{
if (!speed) {
return 0;
}
int64_t t = now - fd[sock].start + 1;
int target = (int)(((t / HRTIME_MSECOND) * speed) / 1000);
int delta = d - target;
if (delta > 0) {
int mwait = (delta * 1000) / speed;
fd[sock].ready = now + (mwait * HRTIME_MSECOND);
return 1;
} else {
fd[sock].ready = now;
}
return 0;
}
// Return the number of milliseconds elapsed since the start of the request.
static ink_hrtime
elapsed_from_start(int sock)
{
ink_hrtime timenow = ink_get_hrtime_internal();
return ink_hrtime_diff_msec(timenow, fd[sock].start);
}
static int
faster_than(int sock, int speed, int d)
{
if (!speed) {
return 1;
}
int64_t t = now - fd[sock].start + 1;
int target = (int)(((t / HRTIME_MSECOND) * speed) / 1000);
int delta = d - target;
if (delta > 0) {
return 1;
}
return 0;
}
static void
get_path_from_req(char *buf, char **purl_start, char **purl_end)
{
char *url_start = buf;
char *url_end = nullptr;
if (!strncasecmp(url_start, "GET ", sizeof("GET ") - 1)) {
url_start += sizeof("GET ") - 1;
url_end = (char *)memchr(url_start, ' ', 70);
} else if (!strncasecmp(url_start, "POST ", sizeof("POST ") - 1)) {
url_start += sizeof("POST ") - 1;
url_end = (char *)memchr(url_start, ' ', 70);
} else {
url_end = (char *)memchr(url_start, 0, 70);
}
if (!url_end) {
panic("malformed request\n");
}
if (url_end - url_start > 10) {
if (!strncasecmp(url_start, "http://", sizeof("http://") - 1)) {
url_start += sizeof("http://") - 1;
url_start = (char *)memchr(url_start, '/', 70);
}
}
*purl_start = url_start;
*purl_end = url_end;
}
static int
make_response_header(int sock, char *url_start, char *url_end, int *url_len, char *header, int header_limit)
{
const char *content_type;
switch (server_content_type) {
case 1:
content_type = "text/html";
break;
case 2:
content_type = "image/jpeg";
break;
default:
content_type = ((compd_suite || alternates) ? "image/jpeg" : "text/html");
if (only_server && strstr(fd[sock].req_header, "Cookie:")) {
content_type = "image/jpeg";
}
}
if (!ftp && embed_url && fd[sock].response_length > 16) {
get_path_from_req(fd[sock].req_header, &url_start, &url_end);
*url_end = 0;
*url_len = url_end - url_start;
}
int print_len = 0;
if (!ftp) {
if (fd[sock].range) {
char buff[1024];
memset(buff, 0, 1024);
if (fd[sock].range_end > fd[sock].range_start) {
snprintf(buff, 1024, "Content-Range: bytes %lu-%lu/%d", fd[sock].range_start, fd[sock].range_end, fd[sock].total_length);
} else {
snprintf(buff, 1024, "Content-Range: bytes %lu-%d/%d", fd[sock].range_start, fd[sock].total_length, fd[sock].total_length);
}
print_len = snprintf(header, header_limit,
"HTTP/1.1 206 Partial-Content\r\n"
"Content-Type: %s\r\n"
"Cache-Control: max-age=630720000\r\n"
"Last-Modified: Mon, 05 Oct 2010 01:00:00 GMT\r\n"
"%s"
"Content-Length: %d\r\n"
"%s\r\n"
"%s"
"\r\n%s",
content_type, fd[sock].keepalive > 0 ? "Connection: Keep-Alive\r\n" : "Connection: close\r\n",
fd[sock].response_length, buff, no_cache ? "Pragma: no-cache\r\nCache-Control: no-cache\r\n" : "",
url_start ? url_start : "");
} else if (fd[sock].ims) {
print_len = snprintf(header, header_limit,
"HTTP/1.0 304 Not-Modified\r\n"
"Content-Type: %s\r\n"
"Last-Modified: Mon, 05 Oct 2010 01:00:00 GMT\r\n"
"%s"
"\r\n",
content_type, fd[sock].keepalive > 0 ? "Connection: Keep-Alive\r\n" : "");
*url_len = 0;
} else {
print_len = snprintf(header, header_limit,
"HTTP/1.0 200 OK\r\n"
"Content-Type: %s\r\n"
"Cache-Control: max-age=630720000\r\n"
"Last-Modified: Mon, 05 Oct 2010 01:00:00 GMT\r\n"
"%s"
"Content-Length: %d\r\n"
"%s"
"\r\n%s",
content_type, fd[sock].keepalive > 0 ? "Connection: Keep-Alive\r\n" : "", fd[sock].response_length,
no_cache ? "Pragma: no-cache\r\nCache-Control: no-cache\r\n" : "", url_start ? url_start : "");
}
} else {
*url_len = print_len = sprintf(header, "ftp://%s:%d/%12.10f/%d", local_host, server_port, fd[sock].doc, fd[sock].length);
}
if (show_headers) {
printf("Response to Proxy: {\n%s}\n", header);
}
return print_len;
}
static int
send_response(int sock)
{
char *url_start = nullptr;
char *url_end = nullptr;
int err = 0, towrite;
int url_len = 0;
if (fd[sock].req_pos >= 0) {
char header[1024];
int print_len = make_response_header(sock, url_start, url_end, &url_len, header, 1024);
int len = print_len - fd[sock].req_pos;
ink_assert(len > 0);
do {
err = write(sock, header + fd[sock].req_pos, len);
} while ((err == -1) && (errno == EINTR));
if (err <= 0) {
if (!err) {
return -1;
}
if (errno == EAGAIN || errno == ENOTCONN) {
return 0;
}
return -1;
}
if (verbose) {
printf("wrote %d %d\n", sock, err);
}
new_tbytes += err;
fd[sock].req_pos += err;
fd[sock].bytes += err;
if (fd[sock].req_pos >= len) {
fd[sock].req_pos = -1;
} else {
return 0;
}
fd[sock].response += url_len;
fd[sock].length -= url_len;
if (fd[sock].range) {
fd[sock].range_bytes -= url_len;
}
total_server_response_header_bytes += print_len - url_len;
total_server_response_body_bytes += url_len;
}
/* then the response */
towrite = server_speed ? server_speed : MAX_RESPONSE_LENGTH;
if (!fd[sock].range) {
if (fd[sock].length < towrite) {
towrite = fd[sock].length;
}
} else {
if (fd[sock].range_bytes < (unsigned long)towrite) {
towrite = fd[sock].range_bytes;
}
}
if (towrite > 0) {
if (fast(sock, server_speed, fd[sock].bytes)) {
return 0;
}
do {
err = write(sock, fd[sock].response, towrite);
} while ((err == -1) && (errno == EINTR));
if (err < 0) {
if (errno == EAGAIN || errno == ENOTCONN) {
return 0;
}
fprintf(stderr, "write errno %d length %d sock %d\n", errno, towrite, sock);
errors++;
return -1;
}
if (verbose) {
printf("wrote %d %d\n", sock, err);
}
if (fd[sock].range) {
ink_assert(err <= (int)(fd[sock].range_end - fd[sock].range_start + 1));
}
new_tbytes += err;
total_server_response_body_bytes += err;
fd[sock].response += err;
fd[sock].length -= err;
fd[sock].bytes += err;
}
if (fast(sock, server_speed, fd[sock].bytes)) {
return 0;
}
if (fd[sock].length <= 0 || !err) {
if (fd[sock].response) {
new_sops++;
}
if (verbose) {
printf("write %d done\n", sock);
}
if (fd[sock].keepalive > 0 && !ftp) {
poll_init_set(sock, read_request);
fd[sock].start = now;
fd[sock].ready = now + server_delay * HRTIME_MSECOND;
return 0;
}
return 1;
}
return 0;
}
static char *
strncasestr(char *s, const char *find, int len)
{
int findlen = strlen(find);
char *e = s + len;
while (1) {
char *x = (char *)memchr(s, *find, e - s);
if (!x) {
if (ParseRules::is_upalpha(*find)) {
x = (char *)memchr(s, ParseRules::ink_tolower(*find), e - s);
} else {
x = (char *)memchr(s, ParseRules::ink_toupper(*find), e - s);
}
if (!x) {
break;
}
}
if (!strncasecmp(find, x, findlen)) {
return x;
}
s = x + 1;
}
return nullptr;
}
static char *
check_keepalive(char *r, int length)
{
char *ka = strncasestr(r, "Connection:", length);
char *http_1_1 = strncasestr(r, "HTTP/1.1", length);
if (http_1_1 && !ka) {
return http_1_1;
}
if (ka) {
int l = length - (ka - r);
char *e = (char *)memchr(ka, '\n', l);
if (!e) {
e = (char *)memchr(ka, '\r', l);
}
if (strncasestr(ka, "close", e - ka)) {
return nullptr;
}
}
return ka;
}
static int
check_alt(char *r, int length)
{
char *s = strncasestr(r, "Cookie:", length);
if (!s) {
s = strncasestr(r, "User-Agent:", length);
if (s) {
s += sizeof("User-Agent:");
}
} else {
s += sizeof("Cookie:");
}
if (s) {
int l = length - (s - r);
char *e = (char *)memchr(s, '\n', l);
if (!e) {
e = (char *)memchr(s, '\r', l);
}
if (!(s = strncasestr(s, "jtest", e - s))) {
return 0;
}
s = (char *)memchr(s, '-', l);
if (!s) {
return 0;
}
s = (char *)memchr(s + 1, '-', l);
if (!s) {
return 0;
}
return ink_atoi(s + 1);
}
return 0;
}
static void
make_response(int sock, int code)
{
fd[sock].response = fd[sock].req_header;
fd[sock].length = sprintf(fd[sock].req_header, "%d\r\n", code);
fd[sock].req_pos = 0;
fd[sock].response_length = strlen(fd[sock].req_header);
poll_set(sock, nullptr, write_ftp_response);
}
static void
make_long_response(int sock)
{
fd[sock].response = fd[sock].req_header;
fd[sock].req_pos = 0;
fd[sock].response_length = strlen(fd[sock].req_header);
poll_set(sock, nullptr, write_ftp_response);
}
static int
send_ftp_data_when_ready(int sock)
{
if (fd[sock].state == STATE_FTP_DATA_READY && fd[sock].doc_length) {
fd[sock].response = fd[sock].req_header;
fd[sock].response_length = fd[sock].length = fd[sock].doc_length;
if (verbose) {
printf("ftp data %d >-< %d\n", sock, fd[sock].ftp_data_fd);
}
fd[sock].response = response_buffer + fd[sock].doc_length % 256;
fd[sock].req_pos = 0;
poll_set(sock, nullptr, send_response);
}
return 0;
}
static int
send_ftp_data(int sock, char *start /*, char * end */)
{
int data_fd = fd[sock].ftp_data_fd;
if (sscanf(start, "%d", &fd[data_fd].doc_length) != 1) {
return -1;
}
fd[data_fd].doc = fd[sock].doc;
send_ftp_data_when_ready(data_fd);
return 0;
}
static int
process_header(int sock, char *buffer, int offset)
{
char host[80];
int port, length;
float r;
int post_request = 0;
if (sscanf(buffer, "GET http://%[^:]:%d/%f/%d", host, &port, &r, &length) == 4) {
} else if (sscanf(buffer, "GET /%f/%d", &r, &length) == 2) {
} else if (sscanf(buffer, "POST http://%[^:]:%d/%f/%d", host, &port, &r, &length) == 4) {
post_request = 1;
} else if (sscanf(buffer, "POST /%f/%d", &r, &length) == 2) {
post_request = 1;
} else {
if (verbose) {
printf("misscan: %s\n", buffer);
}
fd[sock].close();
return -1;
}
if (verbose) {
printf("read_request %d got request %d\n", sock, length);
}
char *ims = strncasestr(buffer, "If-Modified-Since:", offset);
char *range = strncasestr(buffer, "Range:", offset);
char *post_cl = nullptr;
if (post_support) {
post_cl = strncasestr(buffer, "Content-Length:", offset);
fd[sock].post_cl = atoi(post_cl + strlen("Content-Length: "));
ink_assert(post_cl && post_request && fd[sock].post_cl);
}
// coverity[dont_call]
if (drand48() > ims_rate) {
ims = nullptr;
}
if (range) {
fd[sock].range = 1;
if (sscanf(range, "Range: bytes=%lu-%lu", &fd[sock].range_start, &fd[sock].range_end) == 2) {
fd[sock].range_bytes = fd[sock].range_end - fd[sock].range_start + 1;
} else if (sscanf(range, "Range: bytes=%lu-", &fd[sock].range_start) == 1) {
fd[sock].range_bytes = length - fd[sock].range_start + 1;
} else {
if (verbose)
printf("unvalid 206");
}
ims = nullptr;
if (verbose) {
printf("sending Range: 206 Partial %lu-%lu\n", fd[sock].range_start, fd[sock].range_end);
}
}
fd[sock].ims = ims ? 1 : 0;
if (!ims) {
if (range) {
fd[sock].total_length = length;
fd[sock].response_length = fd[sock].length = fd[sock].range_bytes;
} else {
fd[sock].response_length = fd[sock].length = length;
}
fd[sock].nalternate = check_alt(fd[sock].req_header, strlen(fd[sock].req_header));
fd[sock].response = response_buffer + length % 256 + fd[sock].nalternate;
} else {
fd[sock].nalternate = 0;
if (verbose) {
printf("sending IMS 304: Not-Modified\n");
}
fd[sock].response = nullptr;
fd[sock].response_length = fd[sock].length = 0;
}
fd[sock].header_size = offset;
return post_request;
}
static int
parse_header(int sock, int err)
{
int i;
int post_request = 0;
if (verbose) {
printf("read %d got %d\n", sock, err);
}
total_proxy_request_bytes += err;
new_tbytes += err;
fd[sock].req_pos += err;
fd[sock].req_header[fd[sock].req_pos] = 0;
char *buffer = fd[sock].req_header;
for (i = fd[sock].req_pos - err; i < fd[sock].req_pos; i++) {
switch (fd[sock].state) {
case 0:
if (buffer[i] == '\r') {
fd[sock].state = 1;
} else if (buffer[i] == '\n') {
fd[sock].state = 2;
}
break;
case 1:
if (buffer[i] == '\n') {
fd[sock].state = 2;
} else {
fd[sock].state = 0;
}
break;
case 2:
if (buffer[i] == '\r') {
fd[sock].state = 3;
} else if (buffer[i] == '\n') {
fd[sock].state = 3;
goto L3;
} else {
fd[sock].state = 0;
}
break;
L3:
case 3:
if (buffer[i] == '\n') {
if (show_headers) {
printf("Request from Proxy: {\n%s}\n", buffer);
}
post_request = process_header(sock, buffer, i);
if (post_request < 0) {
return JTEST_DONE;
}
if (post_request) {
fd[sock].state = 4;
break;
}
fd[sock].req_pos = 0;
if (!check_keepalive(fd[sock].req_header, strlen(fd[sock].req_header))) {
fd[sock].keepalive = 0;
} else {
fd[sock].keepalive--;
}
// coverity[dont_call]
if (fd[sock].length && drand48() < server_abort_rate) {
// coverity[dont_call]
fd[sock].length = (int)(drand48() * (fd[sock].length - 1));
fd[sock].keepalive = 0;
}
poll_set(sock, nullptr, send_response);
return JTEST_DONE;
} else {
fd[sock].state = 0;
}
break;
case 4:
if (fd[sock].req_pos - fd[sock].header_size - 1 >= fd[sock].post_cl) {
fd[sock].req_pos = 0;
if (!check_keepalive(fd[sock].req_header, strlen(fd[sock].req_header))) {
fd[sock].keepalive = 0;
} else {
fd[sock].keepalive--;
}
// coverity[dont_call]
if (fd[sock].length && drand48() < server_abort_rate) {
// coverity[dont_call]
fd[sock].length = (int)(drand48() * (fd[sock].length - 1));
fd[sock].keepalive = 0;
}
poll_set(sock, nullptr, send_response);
fd[sock].state = 0;
return JTEST_DONE;
}
return JTEST_CONT;
}
}
return JTEST_CONT;
}
static int
read_request(int sock)
{
if (verbose) {
printf("read_request %d\n", sock);
}
int err = 0;
int maxleft = 0;
if (!post_support)
maxleft = HEADER_SIZE - fd[sock].req_pos - 1;
else
maxleft = HEADER_SIZE + MAX_REQUEST_BODY_LENGTH - fd[sock].req_pos - 1;
while (true) {
do {
err = read(sock, &fd[sock].req_header[fd[sock].req_pos], maxleft);
} while ((err < 0) && (errno == EINTR));
if (err < 0) {
if (errno == EAGAIN || errno == ENOTCONN) {
return 0;
}
if (fd[sock].req_pos || errno != ECONNRESET) {
perror("read");
}
return -1;
} else if (err == 0) {
if (verbose) {
printf("eof\n");
}
return -1;
} else {
if (verbose) {
printf("read %d got %d\n", sock, err);
}
if (parse_header(sock, err) == JTEST_DONE)
return 0;
}
}
return 0;
}
static int
send_compd_response(int sock)
{
int err = 0;
struct {
unsigned int code;
unsigned int len;
} compd_header;
if (fd[sock].req_pos < (int)sizeof(compd_header)) {
compd_header.code = 0;
compd_header.len = htonl((fd[sock].length * 2) / 3);
do {
err = write(sock, (char *)&compd_header + fd[sock].req_pos, sizeof(compd_header) - fd[sock].req_pos);
} while ((err == -1) && (errno == EINTR));
if (err <= 0) {
if (!err) {
if (verbose_errors) {
printf("write %d closed early\n", sock);
}
goto Lerror;
}
if (errno == EAGAIN || errno == ENOTCONN) {
return 0;
}
perror("write");
goto Lerror;
}
if (verbose) {
printf("write %d %d\n", sock, err);
}
new_tbytes += err;
fd[sock].req_pos += err;
fd[sock].bytes += err;
fd[sock].response = response_buffer + (((fd[sock].length * 2) / 3) % 256);
}
if (fd[sock].req_pos < ((fd[sock].length * 2) / 3) + (int)sizeof(compd_header)) {
int towrite = cbuffersize;
int desired = ((fd[sock].length * 2) / 3) + sizeof(compd_header) - fd[sock].req_pos;
if (towrite > desired) {
towrite = desired;
}
if (fast(sock, client_speed, fd[sock].bytes)) {
return 0;
}
do {
err = write(sock, fd[sock].response + fd[sock].req_pos - sizeof(compd_header), towrite);
} while ((err == -1) && (errno == EINTR));
if (err < 0) {
if (errno == EAGAIN || errno == ENOTCONN) {
return 0;
}
fprintf(stderr, "write errno %d length %d sock %d\n", errno, towrite, sock);
errors++;
return -1;
}
if (verbose) {
printf("wrote %d %d\n", sock, err);
}
new_tbytes += err;
total_server_response_body_bytes += err;
fd[sock].req_pos += err;
fd[sock].bytes += err;
}
if (fd[sock].req_pos >= ((fd[sock].length * 2) / 3) + 4) {
return -1;
}
return 0;
Lerror:
errors++;
return 1;
}
static int
read_compd_request(int sock)
{
if (verbose) {
printf("read_compd_request %d\n", sock);
}
int err = 0;
if (fd[sock].req_pos < 4) {
int maxleft = HEADER_SIZE - fd[sock].req_pos - 1;
do {
err = read(sock, &fd[sock].req_header[fd[sock].req_pos], maxleft);
} while ((err < 0) && (errno == EINTR));
if (err < 0) {
if (errno == EAGAIN || errno == ENOTCONN) {
return 0;
}
perror("read");
return -1;
} else if (err == 0) {
if (verbose) {
printf("eof\n");
}
return -1;
} else {
if (verbose) {
printf("read %d got %d\n", sock, err);
}
total_proxy_request_bytes += err;
new_tbytes += err;
fd[sock].req_pos += err;
if (fd[sock].req_pos < 4) {
return 0;
}
fd[sock].length = ntohl(*(unsigned int *)fd[sock].req_header);
}
}
if (fd[sock].req_pos >= fd[sock].length + 4) {
goto Lcont;
}
{
char buf[MAX_BUFSIZE];
int toread = cbuffersize;
if (fast(sock, client_speed, fd[sock].bytes)) {
return 0;
}
do {
err = read(sock, buf, toread);
} while ((err == -1) && (errno == EINTR));
if (err < 0) {
if (errno == EAGAIN || errno == ENOTCONN) {
return 0;
}
if (errno == ECONNRESET) {
if (verbose || verbose_errors) {
perror("read");
}
errors++;
return -1;
}
panic_perror("read");
}
if (!err) {
if (verbose || verbose_errors) {
perror("read");
}
errors++;
return -1;
}
total_proxy_request_bytes += err;
new_tbytes += err;
fd[sock].req_pos += err;
}
if (fd[sock].req_pos >= fd[sock].length + 4) {
goto Lcont;
}
return 0;
Lcont:
fd[sock].req_pos = 0;
fd[sock].keepalive = 0;
poll_set(sock, nullptr, send_compd_response);
return 0;
}
static int
read_ftp_request(int sock)
{
if (verbose) {
printf("read_ftp_request %d\n", sock);
}
int err = 0;
int i;
int maxleft = HEADER_SIZE - fd[sock].req_pos - 1;
do {
err = read(sock, &fd[sock].req_header[fd[sock].req_pos], maxleft);
} while ((err < 0) && (errno == EINTR));
if (err < 0) {
if (errno == EAGAIN || errno == ENOTCONN) {
return 0;
}
perror("read");
return -1;
} else if (err == 0) {
if (verbose) {
printf("eof\n");
}
return -1;
} else {
if (verbose) {
printf("read %d got %d\n", sock, err);
}
new_tbytes += err;
fd[sock].req_pos += err;
fd[sock].req_header[fd[sock].req_pos] = 0;
char *buffer = fd[sock].req_header, *n;
int res = 0;
buffer[fd[sock].req_pos] = 0;
if (verbose) {
printf("buffer [%s]\n", buffer);
}
#define STREQ(_x, _s) (!strncasecmp(_x, _s, sizeof(_s) - 1))
if (STREQ(buffer, "USER")) {
res = 331;
goto Lhere;
} else if (STREQ(buffer, "PASS")) {
res = 230;
goto Lhere;
} else if (STREQ(buffer, "CWD")) {
// TS used to send "CWD 1.2110000000..."
// TS now sends "CWD /1.2110000000^M\n", so skip 5 instead of 4
fd[sock].doc = (buffer[4] == '/') ? atof(buffer + 5) : atof(buffer + 4);
res = 250;
goto Lhere;
} else if (STREQ(buffer, "TYPE")) {
res = 200;
Lhere:
n = (char *)memchr(buffer, '\n', fd[sock].req_pos);
if (!n) {
return 0;
}
make_response(sock, res);
return 0;
} else if (STREQ(buffer, "SIZE")) {
fd[sock].length = sprintf(fd[sock].req_header, "213 %d\r\n", atoi(buffer + 5));
make_long_response(sock);
return 0;
} else if (STREQ(buffer, "MDTM")) {
double err_rand = 1.0;
if (ftp_mdtm_err_rate != 0.0) {
// coverity[dont_call]
err_rand = drand48();
}
if (err_rand < ftp_mdtm_err_rate) {
fd[sock].length = sprintf(fd[sock].req_header, "550 mdtm file not found\r\n");
} else {
if (ftp_mdtm_rate == 0) {
fd[sock].length = sprintf(fd[sock].req_header, "213 19900615100045\r\n");
} else {
time_t mdtm_now;
time(&mdtm_now);
if (mdtm_now - ftp_mdtm_last_update > ftp_mdtm_rate) {
struct tm *mdtm_tm;
ftp_mdtm_last_update = mdtm_now;
mdtm_tm = localtime(&ftp_mdtm_last_update);
sprintf(ftp_mdtm_str, "213 %.4d%.2d%.2d%.2d%.2d%.2d", mdtm_tm->tm_year + 1900, mdtm_tm->tm_mon + 1, mdtm_tm->tm_mday,
mdtm_tm->tm_hour, mdtm_tm->tm_min, mdtm_tm->tm_sec);
}
fd[sock].length = sprintf(fd[sock].req_header, "%s\r\n", ftp_mdtm_str);
}
}
make_long_response(sock);
return 0;
} else if (STREQ(buffer, "PASV")) {
n = (char *)memchr(buffer, '\n', fd[sock].req_pos);
if (!n) {
return 0;
}
if ((fd[sock].ftp_data_fd = open_server(0, accept_ftp_data)) < 0) {
panic("could not open ftp data PASV accept port\n");
}
fd[fd[sock].ftp_data_fd].ftp_data_fd = sock;
if (verbose) {
printf("ftp PASV %d <-> %d\n", sock, fd[sock].ftp_data_fd);
}
unsigned short p = fd[fd[sock].ftp_data_fd].name.sin_port;
fd[sock].length = sprintf(fd[sock].req_header, "227 (%u,%u,%u,%u,%u,%u)\r\n", ((unsigned char *)&local_addr)[0],
((unsigned char *)&local_addr)[1], ((unsigned char *)&local_addr)[2],
((unsigned char *)&local_addr)[3], ((unsigned char *)&p)[0], ((unsigned char *)&p)[1]);
if (verbose) {
puts(fd[sock].req_header);
}
make_long_response(sock);
fd[sock].ftp_mode = FTP_PASV;
return 0;
} else if (STREQ(buffer, "PORT")) {
// watch out for an endian problems !!!
char *start, *stop;
for (start = buffer; !ParseRules::is_digit(*start); start++) {
;
}
for (stop = start; *stop != ','; stop++) {
;
}
for (i = 0; i < 4; i++) {
((unsigned char *)&(fd[sock].ftp_peer_addr))[i] = strtol(start, &stop, 10);
for (start = ++stop; *stop != ','; stop++) {
;
}
}
((unsigned char *)&(fd[sock].ftp_peer_port))[0] = strtol(start, &stop, 10);
start = ++stop;
((unsigned char *)&(fd[sock].ftp_peer_port))[1] = strtol(start, nullptr, 10);
fd[sock].length = sprintf(fd[sock].req_header, "200 Okay\r\n");
if (verbose) {
puts(fd[sock].req_header);
}
make_long_response(sock);
fd[sock].ftp_mode = FTP_PORT;
return 0;
} else if (STREQ(buffer, "RETR")) {
if (fd[sock].ftp_mode == FTP_NULL) {
// default to PORT ftp
struct sockaddr_in ftp_peer;
int ftp_peer_addr_len = sizeof(ftp_peer);
if (getpeername(sock, (struct sockaddr *)&ftp_peer,
#if 0
&ftp_peer_addr_len
#else
(socklen_t *)&ftp_peer_addr_len
#endif
) < 0) {
perror("getsockname");
exit(EXIT_FAILURE);
}
fd[sock].ftp_peer_addr = ftp_peer.sin_addr.s_addr;
fd[sock].ftp_peer_port = ftp_peer.sin_port;
fd[sock].ftp_mode = FTP_PORT;
}
if (fd[sock].ftp_mode == FTP_PORT) {
if ((fd[sock].ftp_data_fd = make_client(fd[sock].ftp_peer_addr, fd[sock].ftp_peer_port)) < 0) {
panic("could not open ftp PORT data connection to client\n");
}
fd[fd[sock].ftp_data_fd].ftp_data_fd = sock;
fd[fd[sock].ftp_data_fd].state = STATE_FTP_DATA_READY;
if (verbose) {
printf("ftp PORT %d <-> %d\n", sock, fd[sock].ftp_data_fd);
}
}
n = (char *)memchr(buffer, '\n', fd[sock].req_pos);
if (!n) {
return 0;
}
if (send_ftp_data(sock, buffer + 5 /*, n */) < 0) {
errors++;
*n = 0;
if (verbose) {
printf("badly formed ftp request: %s\n", buffer);
}
return 1;
}
fd[sock].response = fd[sock].req_header;
fd[sock].length = sprintf(fd[sock].req_header, "150 %d bytes\r\n", fd[fd[sock].ftp_data_fd].length);
fd[sock].req_pos = 0;
fd[sock].response_length = strlen(fd[sock].req_header);
poll_set(sock, nullptr, write_ftp_response);
return 0;
} else {
if (verbose || verbose_errors) {
printf("ftp junk : %s\n", buffer);
}
fd[sock].req_pos = 0;
return 0;
}
}
}
static int
accept_sock(int sock)
{
struct sockaddr_in clientname;
int size = sizeof(clientname);
int new_fd = 0;
do {
new_fd = accept(sock, (struct sockaddr *)&clientname,
#if 0
&size
#else
(socklen_t *)&size
#endif
);
if (new_fd < 0) {
if (errno == EAGAIN || errno == ENOTCONN) {
return 0;
}
if (errno == EINTR || errno == ECONNABORTED) {
continue;
}
printf("accept socket was %d\n", sock);
panic_perror("accept");
}
} while (new_fd < 0);
if (fcntl(new_fd, F_SETFL, O_NONBLOCK) < 0) {
panic_perror("fcntl");
}
#if 0
#ifdef BUFSIZE // make default
int bufsize = BUFSIZE;
if (setsockopt(new_fd,SOL_SOCKET,SO_SNDBUF,
(const char *)&bufsize,sizeof(bufsize)) < 0) {
perror("setsockopt");
}
if (setsockopt(new_fd,SOL_SOCKET,SO_SNDBUF,
(const char *)&bufsize,sizeof(bufsize)) < 0) {
perror("setsockopt");
}
#endif
#endif
int enable = 1;
if (setsockopt(new_fd, IPPROTO_TCP, TCP_NODELAY, (const char *)&enable, sizeof(enable)) < 0) {
perror("setsockopt");
}
#ifdef PRINT_LOCAL_PORT
struct sockaddr_in local_sa;
size = sizeof(local_sa);
getsockname(new_fd, (struct sockaddr *)&local_sa, &size);
printf("local_sa.sin_port = %d\n", local_sa.sin_port);
#endif
return new_fd;
}
static int
accept_compd(int sock)
{
int new_fd = accept_sock(sock);
servers++;
new_servers++;
poll_init_set(new_fd, nullptr, read_compd_request);
fd[new_fd].count = &servers;
fd[new_fd].start = now;
fd[new_fd].ready = now + server_delay * HRTIME_MSECOND;
fd[new_fd].keepalive = server_keepalive ? server_keepalive : INT_MAX;
return 0;
}
static int
accept_read(int sock)
{
int new_fd = accept_sock(sock);
servers++;
new_servers++;
if (ftp) {
poll_init_set(new_fd, nullptr, write_ftp_response);
make_response(new_fd, 220);
} else {
poll_init_set(new_fd, read_request);
}
fd[new_fd].count = &servers;
fd[new_fd].start = now;
fd[new_fd].ready = now + server_delay * HRTIME_MSECOND;
fd[new_fd].keepalive = server_keepalive ? server_keepalive : INT_MAX;
return 0;
}
static int
accept_ftp_data(int sock)
{
int new_fd = accept_sock(sock);
servers++;
new_servers++;
poll_init(new_fd);
fd[new_fd].ftp_data_fd = fd[sock].ftp_data_fd;
fd[fd[sock].ftp_data_fd].ftp_data_fd = new_fd;
fd[new_fd].state = STATE_FTP_DATA_READY;
fd[new_fd].count = &servers;
fd[new_fd].start = now;
fd[new_fd].ready = now + server_delay * HRTIME_MSECOND;
fd[new_fd].keepalive = server_keepalive ? server_keepalive : INT_MAX;
fd[new_fd].state = STATE_FTP_DATA_READY;
fd[new_fd].doc = fd[sock].doc;
fd[new_fd].doc_length = fd[sock].doc_length;
if (verbose) {
printf("accept_ftp_data %d for %d\n", new_fd, sock);
}
send_ftp_data_when_ready(new_fd);
return 1;
}
static int
open_server(unsigned short int port, accept_fn_t accept_fn)
{
struct linger lngr;
int sock;
int one = 1;
/* Create the socket. */
sock = socket(AF_INET, SOCK_STREAM, 0);
if (sock < 0) {
perror("socket");
exit(EXIT_FAILURE);
}
struct sockaddr_in &name = fd[sock].name;
/* Give the socket a name. */
name.sin_family = AF_INET;
name.sin_port = htons(port);
name.sin_addr.s_addr = htonl(INADDR_ANY);
if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char *)&one, sizeof(one)) < 0) {
perror((char *)"setsockopt");
exit(EXIT_FAILURE);
}
if (bind(sock, (struct sockaddr *)&name, sizeof(name)) < 0) {
if (errno == EADDRINUSE) {
close(sock);
return -EADDRINUSE;
}
perror("bind");
exit(EXIT_FAILURE);
}
int addrlen = sizeof(name);
if (getsockname(sock, (struct sockaddr *)&name,
#if 0
&addrlen
#else
(socklen_t *)&addrlen
#endif
) < 0) {
perror("getsockname");
exit(EXIT_FAILURE);
}
ink_assert(addrlen);
/* Tell the socket not to linger on exit */
lngr.l_onoff = 0;
lngr.l_linger = 0;
if (setsockopt(sock, SOL_SOCKET, SO_LINGER, (char *)&lngr, sizeof(struct linger)) < 0) {
perror("setsockopt");
exit(EXIT_FAILURE);
}
if (listen(sock, 1024) < 0) {
perror("listen");
exit(EXIT_FAILURE);
}
/* put the socket in non-blocking mode */
if (fcntl(sock, F_SETFL, O_NONBLOCK) < 0) {
perror("fcntl");
exit(EXIT_FAILURE);
}
if (verbose) {
printf("opening server on %d port %d\n", sock, port);
}
poll_init_set(sock, accept_fn);
return sock;
}
// perform poll and invoke callbacks on active descriptors
static int
poll_loop()
{
if (server_fd > 0) {
while (read_ready(server_fd) > 0) {
accept_read(server_fd);
}
}
pollfd pfd[POLL_GROUP_SIZE];
int ip = 0;
now = ink_get_hrtime_internal();
for (int i = 0; i <= last_fd; i++) {
if (fd[i].fd > 0 && (!fd[i].ready || now >= fd[i].ready)) {
pfd[ip].fd = i;
pfd[ip].events = 0;
pfd[ip].revents = 0;
if (fd[i].read_cb) {
pfd[ip].events |= POLLIN;
}
if (fd[i].write_cb) {
pfd[ip].events |= POLLOUT;
}
ip++;
}
if (ip >= POLL_GROUP_SIZE || i == last_fd) {
int n = poll(pfd, ip, POLL_TIMEOUT);
if (n > 0) {
for (int j = 0; j < ip; j++) {
if (pfd[j].revents & (POLLIN | POLLERR | POLLHUP | POLLNVAL)) {
if (verbose) {
printf("poll read %d %X\n", pfd[j].fd, pfd[j].revents);
}
if (fd[pfd[j].fd].read_cb && fd[pfd[j].fd].read_cb(pfd[j].fd)) {
fd[pfd[j].fd].close();
continue;
}
}
if (pfd[j].revents & (POLLOUT | POLLERR | POLLHUP | POLLNVAL)) {
if (verbose) {
printf("poll write %d %X\n", pfd[j].fd, pfd[j].revents);
}
if (fd[pfd[j].fd].write_cb && fd[pfd[j].fd].write_cb(pfd[j].fd)) {
fd[pfd[j].fd].close();
continue;
}
}
}
}
ip = 0;
}
}
return 0;
}
static int
gen_bfc_dist(double f = 10.0)
{
if (docsize >= 0) {
return docsize;
}
double rand = 0.0;
double rand2 = 0.0;
bool f_given = f < 9.0;
if (!f_given) {
// coverity[dont_call]
rand = drand48();
// coverity[dont_call]
rand2 = drand48();
} else {
rand = f;
rand2 = (f * 13.0) - floor(f * 13.0);
}
int class_no;
int file_no = 0;
if (rand < 0.35) {
class_no = 0;
} else if (rand < 0.85) {
class_no = 1;
} else if (rand < 0.99) {
class_no = 2;
} else {
class_no = 3;
if (f_given) {
rand2 = (f * 113.0) - floor(f * 113.0);
}
}
if (rand2 < 0.018) {
file_no = 0;
} else if (rand2 < 0.091) {
file_no = 1;
} else if (rand2 < 0.237) {
file_no = 2;
} else if (rand2 < 0.432) {
file_no = 3;
} else if (rand2 < 0.627) {
file_no = 4;
} else if (rand2 < 0.783) {
file_no = 5;
} else if (rand2 < 0.887) {
file_no = 6;
} else if (rand2 < 0.945) {
file_no = 7;
} else if (rand2 < 1.000) {
file_no = 8;
}
int size = 100;
int i;
for (i = 0; i < class_no; i++) {
size = size * 10;
}
int increment = size;
size = size * (file_no + 1);
// vary about the mean doc size for
// that class/size
if (!f_given) {
// coverity[dont_call]
size += (int)((-increment * 0.5) + (increment * drand48()));
}
if (verbose) {
printf("gen_bfc_dist %d\n", size);
}
return size;
}
static void
build_response()
{
int maxsize = docsize > MAX_RESPONSE_LENGTH ? docsize : MAX_RESPONSE_LENGTH;
response_buffer = (char *)malloc(maxsize + HEADER_SIZE);
for (int i = 0; i < maxsize + HEADER_SIZE; i++) {
response_buffer[i] = i % 256;
}
}
static void
put_ka(int sock)
{
int i = 0;
for (; i < n_ka_cache; i++) {
if (!ka_cache_head[i] || fd[ka_cache_head[i]].ip == fd[sock].ip) {
goto Lpush;
}
}
i = n_ka_cache++;
Lpush:
if (ka_cache_tail[i]) {
fd[ka_cache_tail[i]].next = sock;
} else {
ka_cache_head[i] = sock;
}
ka_cache_tail[i] = sock;
}
static int
get_ka(unsigned int ip)
{
for (int i = 0; i < n_ka_cache; i++) {
if (fd[ka_cache_head[i]].ip == ip) {
int res = ka_cache_head[i];
ka_cache_head[i] = fd[ka_cache_head[i]].next;
if (res == ka_cache_tail[i]) {
ink_assert(!ka_cache_head[i]);
ka_cache_tail[i] = 0;
}
return res;
}
}
return -1;
}
static void
defer_url(char *url)
{
if (n_defered_urls < MAX_DEFERED_URLS - 1) {
defered_urls[n_defered_urls++] = strdup(url);
} else {
fprintf(stderr, "too many defered urls, dropping '%s'\n", url);
}
}
static int
throttling_connections()
{
return client_rate && keepalive_cons && current_clients >= keepalive_cons;
}
static void
done()
{
interval_report();
exit(0);
}
static int
is_done()
{
return (urls_mode && !current_clients && !n_defered_urls) || (bandwidth_test && bandwidth_test_to_go <= 0 && !current_clients);
}
static void
undefer_url(bool unthrottled)
{
if ((unthrottled || !throttling_connections()) && n_defered_urls) {
--n_defered_urls;
char *url = defered_urls[n_defered_urls];
make_url_client(url, 0, true, unthrottled);
free(url);
if (verbose) {
printf("undefer_url: made client %d clients\n", current_clients);
}
} else if (verbose) {
printf("undefer_url: throttle\n");
}
if (is_done()) {
done();
}
}
static void
init_client(int sock)
{
poll_init(sock);
fd[sock].start = now;
fd[sock].ready = now;
fd[sock].count = &clients;
poll_set(sock, nullptr, write_request);
}
static unsigned int
get_addr(const char *host)
{
unsigned int addr = inet_addr(host);
struct hostent *host_info = nullptr;
if (!addr || (-1 == (int)addr)) {
host_info = gethostbyname(host);
if (!host_info) {
printf("gethostbyname(%s): %s\n", host, hstrerror(h_errno));
return (unsigned int)-1;
}
addr = *((unsigned int *)host_info->h_addr);
}
return addr;
}
static char *
find_href_end(char *start, int len)
{
char *end = start;
if (!start) {
return nullptr;
}
while (*end && len > 0) {
if (*end == '\"') {
break; /* " */
}
if (*end == '\'') {
break;
}
if (*end == '>') {
break;
}
if (*end == ' ') {
break;
}
if (*end == '\t') {
break;
}
if (*end == '\n') {
break;
}
if (*end == '<') {
break;
}
if (*end & 0x80) {
break; /* hi order bit! */
}
len--;
end++;
}
if (*end == 0 || len == 0) {
return nullptr;
} else {
return end;
}
} // find_href_end
static char *
find_href_start(const char *tag, char *base, int len)
{
int taglen = strlen(tag);
if (base == nullptr) {
return nullptr;
}
char *start = base;
char *end = base + len;
Lagain : {
start = strncasestr(start, tag, len);
if ((start == nullptr) || (end - start < 6)) {
return nullptr;
}
start += taglen;
len -= taglen;
} // block
while (ParseRules::is_ws(*start) && (end - start > 1)) {
start++;
len--;
}
if (*start == '=' && (end - start > 1)) {
start++;
len--;
} else {
goto Lagain;
}
while (ParseRules::is_ws(*start) && (end - start > 1)) {
start++;
len--;
}
//
// Optional quotes: href="x" or href='x' or href=x
//
if ((*start == '\"' || (*start == '\'')) && (end - start > 1)) { /*"'*/
start++;
len--;
}
while (ParseRules::is_ws(*start) && (end - start > 1)) {
start++;
len--;
}
return start;
} // find_href_start
static int
compose_url(char *new_url, char *base, char *input)
{
char sche[8], host[512], port[10], path[512], frag[512], quer[512], para[512];
char curl[512];
int xsche, xhost, xport, xpath, xfrag, xquer, xpar, rel, slash;
ink_web_decompose_url(base, sche, host, port, path, frag, quer, para, &xsche, &xhost, &xport, &xpath, &xfrag, &xquer, &xpar, &rel,
&slash);
strcpy(curl, "http://");
strcat(curl, host);
if (xport) {
strcat(curl, ":");
strcat(curl, port);
}
strcat(curl, "/");
strcat(curl, path);
ink_web_canonicalize_url(curl, input, new_url, 512);
return 0;
} // compose_urls
static void
compose_all_urls(const char *tag, char *buf, char *start, char *end, int buflen, char *base_url)
{
char old;
while ((start = find_href_start(tag, end, buflen - (end - buf)))) {
char newurl[512];
end = (char *)find_href_end(start, std::min(static_cast<int>(buflen - (start - buf)), 512 - 10));
if (!end) {
end = start + strlen(tag);
continue;
}
old = *end;
*end = 0;
compose_url(newurl, base_url, start);
make_url_client(newurl, base_url);
*end = old;
} // while
}
//
// Input is a nullptr-terminated string (buf of buflen)
// also, a read-write base_url
//
static void
extract_urls(char *buf, int buflen, char *base_url)
{
// if (verbose) printf("EXTRACT<<%s\n>>", buf);
char *start = nullptr;
char *end = nullptr;
char old_base[512] = {0};
strncpy(old_base, base_url, sizeof(old_base) - 1);
start = strncasestr(buf, "<base ", buflen);
if (start) {
end = (char *)memchr(start, '>', buflen - (start - buf));
if (end) {
char *rover = strncasestr(start, "href", end - start);
if (rover) {
rover += 4;
while (rover < end && (ParseRules::is_ws(*rover) || *rover == '=' || *rover == '\'' || *rover == '\"')) { /* " */
rover++;
}
start = rover;
while (rover < end && !(ParseRules::is_ws(*rover) || *rover == '\'' || *rover == '\"')) {
rover++;
}
*rover = 0;
compose_url(base_url, old_base, start);
// fixup unqualified hostnames (e.g. http://internal/foo)
char *he = strchr(base_url + 8, '/');
if (!memchr(base_url, '.', he - base_url)) {
char t[512] = {0};
strncpy(t, base_url, sizeof(t) - 1);
char *old_he = strchr(old_base + 8, '.');
if (old_he) {
char *old_hee = strchr(old_he, '/');
if (old_hee) {
memcpy(base_url, t, (he - base_url));
memcpy(base_url + (he - base_url), old_he, (old_hee - old_he));
memcpy(base_url + (he - base_url) + (old_hee - old_he), t + (he - base_url), strlen(t + (he - base_url)));
base_url[(he - base_url) + (old_hee - old_he) + strlen(t + (he - base_url))] = 0;
}
}
}
}
}
}
end = buf;
if (follow) {
compose_all_urls("href", buf, start, end, buflen, base_url);
}
if (fullpage) {
const char *tags[] = {
"src", "image", "object", "archive", "background",
// "location", "code"
};
for (unsigned i = 0; i < sizeof(tags) / sizeof(tags[0]); i++) {
compose_all_urls(tags[i], buf, start, end, buflen, base_url);
}
}
} // extract_urls
static void
follow_links(int sock)
{
if (urls_mode) {
if (fd[sock].binary) {
return;
}
int l = fd[sock].response_remaining;
char *r = fd[sock].response, *p = r, *n = r;
if (r) {
extract_urls(r, l, fd[sock].base_url);
}
if (l < MAX_BUFSIZE) {
while (n) {
n = (char *)memchr(p, '\n', l - (p - r));
if (!n) {
n = (char *)memchr(p, '\r', l - (p - r));
}
if (n) {
p = n + 1;
}
}
int done = p - r, remaining = l - done;
if (done) {
memmove(r, p, remaining);
fd[sock].response_remaining = remaining;
}
} else { // bail
fd[sock].response_length = 0;
}
}
}
static int
verify_content(int sock, char *buf, int done)
{
if ((urls_mode && !check_content) || range_mode) {
return 1;
}
int l = fd[sock].response_length;
char *d = response_buffer + (l % 256) + fd[sock].nalternate;
int left = fd[sock].length;
if (left > 0) {
if (embed_url && !fd[sock].jg_compressed) {
if (l == left && left > 64) {
char *url_end = nullptr, *url_start = nullptr;
get_path_from_req(fd[sock].base_url, &url_start, &url_end);
if (url_end - url_start < done) {
if (memcmp(url_start, buf, url_end - url_start)) {
return 0;
}
}
}
// skip past the URL which is embedded in the document
// to confound the fingerprinting code
if (l - left < 64) {
int skip = 64 - (l - left);
left -= skip;
done -= skip;
buf += skip;
if (done < 0) {
done = 0;
}
}
}
if (!check_content) {
return 1;
}
if (done > left) {
done = left;
}
if (memcmp(buf, d + (fd[sock].response_length - left), done)) {
return 0;
}
}
return 1;
}
#define ZIPF_SIZE (1 << 20)
static double *zipf_table = nullptr;
static void
build_zipf()
{
zipf_table = (double *)malloc(ZIPF_SIZE * sizeof(double));
for (int i = 0; i < ZIPF_SIZE; i++) {
zipf_table[i] = 1.0 / pow(i + 2, zipf);
}
for (int i = 1; i < ZIPF_SIZE; i++) {
zipf_table[i] = zipf_table[i - 1] + zipf_table[i];
}
double x = zipf_table[ZIPF_SIZE - 1];
for (int i = 0; i < ZIPF_SIZE; i++) {
zipf_table[i] = zipf_table[i] / x;
}
}
static int
get_zipf(double v)
{
int l = 0, r = ZIPF_SIZE - 1, m;
do {
m = (r + l) / 2;
if (v < zipf_table[m]) {
r = m - 1;
} else {
l = m + 1;
}
} while (l < r);
if (zipf_bucket_size == 1) {
return m;
}
double x = zipf_table[m], y = zipf_table[m + 1];
m += static_cast<int>((v - x) / (y - x));
return m;
}
static int
read_response_error(int sock)
{
errors++;
fd[sock].close();
if (!urls_mode) {
make_bfc_client(proxy_addr, proxy_port);
}
return 0;
}
static int
read_response(int sock)
{
int err = 0;
if (fd[sock].req_pos >= 0) {
if (!fd[sock].req_pos) {
memset(fd[sock].req_header, 0, HEADER_SIZE);
}
do {
int l = HEADER_SIZE - fd[sock].req_pos - 1;
if (l <= 0) {
if (verbose || verbose_errors) {
// coverity[string_null_argument]
printf("header too long '%s'", fd[sock].req_header);
}
return read_response_error(sock);
}
err = read(sock, fd[sock].req_header + fd[sock].req_pos, HEADER_SIZE - fd[sock].req_pos - 1);
} while ((err == -1) && (errno == EINTR));
if (err <= 0) {
if (!err) {
if (verbose_errors) {
printf("read_response %d closed during header for '%s' after %d%s\n", sock, fd[sock].base_url, fd[sock].req_pos,
(keepalive && (fd[sock].keepalive != keepalive) && !fd[sock].req_pos) ? " -- keepalive timeout" : "");
}
return read_response_error(sock);
}
if (errno == EAGAIN || errno == ENOTCONN) {
return 0;
}
if (errno == ECONNRESET) {
if (!fd[sock].req_pos && keepalive > 0 && fd[sock].keepalive != keepalive) {
fd[sock].close();
if (!urls_mode) {
make_bfc_client(proxy_addr, proxy_port);
}
return 0;
}
if (verbose || verbose_errors) {
perror("read");
}
goto Ldone;
}
panic_perror("read");
}
if (verbose) {
printf("read %d header %d [%s]\n", sock, err, fd[sock].req_header);
}
b1_ops++;
strcpy(fd[sock].response_header, fd[sock].req_header);
b1latency += (int)elapsed_from_start(sock);
new_cbytes += err;
new_tbytes += err;
fd[sock].req_pos += err;
fd[sock].bytes += err;
fd[sock].active = ink_get_hrtime_internal();
int total_read = fd[sock].req_pos;
char *p = fd[sock].req_header;
char *cl = nullptr;
int cli = 0;
while ((p = strchr(p, '\n'))) {
if (verbose) {
printf("read header end? [%s]\n", p);
}
if (p[1] == '\n' || (p[1] == '\r' && p[2] == '\n')) {
int off = 1 + (p[1] == '\r' ? 2 : 1);
p += off;
strncpy(fd[sock].response_header, fd[sock].req_header, p - fd[sock].req_header);
fd[sock].response_header[p - fd[sock].req_header] = '\0';
int lbody = fd[sock].req_pos - (p - fd[sock].req_header);
cl = strncasestr(fd[sock].req_header, "Content-Length:", p - fd[sock].req_header);
if (cl) {
cli = atoi(cl + 16);
int expected_length = fd[sock].response_length;
if (compd_suite) {
if (strstr(fd[sock].req_header, "x-jg")) {
fd[sock].jg_compressed = 1;
expected_length = (fd[sock].response_length * 2) / 3;
}
}
if (fd[sock].response_length && verbose_errors && expected_length != cli && !nocheck_length) {
fprintf(stderr, "bad Content-Length expected %d got %d orig %d\n", expected_length, cli, fd[sock].response_length);
}
fd[sock].response_length = fd[sock].length = cli;
}
if (fd[sock].req_header[9] == '2') {
if (!verify_content(sock, p, lbody)) {
if (verbose || verbose_errors) {
printf("content verification error '%s'\n", fd[sock].base_url);
}
return read_response_error(sock);
}
}
total_proxy_response_body_bytes += lbody;
total_proxy_response_header_bytes += p - fd[sock].req_header;
fd[sock].length -= lbody;
ink_assert(fd[sock].length >= 0);
fd[sock].req_pos = -1;
// coverity[dont_call]
if (fd[sock].length && drand48() < client_abort_rate) {
fd[sock].client_abort = 1;
// coverity[dont_call]
fd[sock].length = (int)(drand48() * (fd[sock].length - 1));
fd[sock].keepalive = 0;
fd[sock].drop_after_CL = 1;
}
if (verbose) {
printf("read %d header done\n", sock);
}
break;
}
p++;
}
if (!p) {
return 0;
}
int hlen = p - fd[sock].req_header;
if (show_headers) {
printf("Response From Proxy: {\n");
for (char *c = fd[sock].req_header; c < p; c++) {
putc(*c, stdout);
}
printf("}\n");
}
if (obey_redirects && urls_mode && fd[sock].req_header[9] == '3' && fd[sock].req_header[10] == '0' &&
(fd[sock].req_header[11] == '1' || fd[sock].req_header[11] == '2')) {
char *redirect = strstr(fd[sock].req_header, "http://");
char *e = redirect ? (char *)memchr(redirect, '\n', hlen) : 0;
if (!redirect || !e) {
fprintf(stderr, "bad redirect '%s'", fd[sock].req_header);
} else {
if (e[-1] == '\r') {
e--;
}
*e = 0;
make_url_client(redirect);
}
fd[sock].close();
return 0;
}
if (fd[sock].req_header[9] != '2') {
if (verbose_errors) {
char *e = (char *)memchr(fd[sock].req_header, '\r', hlen);
if (e) {
*e = 0;
} else {
char *e = (char *)memchr(fd[sock].req_header, '\n', hlen);
if (e) {
*e = 0;
} else {
*p = 0;
}
}
printf("error response %d after %dms: '%s':'%s' %lu-%lu\n", sock, (int)elapsed_from_start(sock), fd[sock].base_url,
fd[sock].req_header, fd[sock].range_start, fd[sock].range_end);
}
return read_response_error(sock);
}
char *r = fd[sock].req_header;
int length = p - r;
char *ka = check_keepalive(r, length);
if (urls_mode) {
fd[sock].response_remaining = total_read - length;
if (fd[sock].response_remaining) {
memcpy(fd[sock].response, p, fd[sock].response_remaining);
}
if (check_content && !cl) {
if (verbose || verbose_errors) {
printf("missing Content-Length '%s'\n", fd[sock].base_url);
}
return read_response_error(sock);
}
} else {
fd[sock].response = 0;
}
if (!cl || !ka) {
fd[sock].keepalive = -1;
}
if (!cl) {
fd[sock].length = INT_MAX;
}
}
if (fd[sock].length <= 0 && (fd[sock].keepalive > 0 || fd[sock].drop_after_CL)) {
goto Ldone;
}
{
char *r = nullptr;
char buf[MAX_BUFSIZE];
int toread = cbuffersize;
if (urls_mode) {
if (fd[sock].response_remaining + cbuffersize < MAX_BUFSIZE) {
r = fd[sock].response + fd[sock].response_remaining;
} else {
toread = MAX_BUFSIZE - fd[sock].response_remaining;
if (!toread) {
if (verbose_errors || verbose) {
fprintf(stderr, "line exceeds buffer, unable to follow links\n");
}
toread = cbuffersize;
r = fd[sock].response;
fd[sock].response_remaining = 0;
} else {
r = fd[sock].response + fd[sock].response_remaining;
}
}
} else {
r = buf;
}
if (fast(sock, client_speed, fd[sock].bytes)) {
return 0;
}
if (fd[sock].bytes > abort_retry_bytes && (((now - fd[sock].start + 1) / HRTIME_SECOND) > abort_retry_secs) &&
!faster_than(sock, abort_retry_speed, fd[sock].bytes)) {
fd[sock].client_abort = 1;
fd[sock].keepalive = 0;
if (!urls_mode && !client_rate) {
make_bfc_client(proxy_addr, proxy_port);
}
goto Ldone;
}
do {
err = read(sock, r, toread);
} while ((err == -1) && (errno == EINTR));
if (err < 0) {
if (errno == EAGAIN || errno == ENOTCONN) {
return 0;
}
if (errno == ECONNRESET) {
if (verbose || verbose_errors) {
perror("read");
}
goto Ldone;
}
panic_perror("read");
}
if (!err) {
goto Ldone;
}
if (!verify_content(sock, buf, err)) {
if (verbose || verbose_errors) {
printf("content verification error '%s'\n", fd[sock].base_url);
}
return read_response_error(sock);
}
total_proxy_response_body_bytes += err;
new_cbytes += err;
new_tbytes += err;
fd[sock].response_remaining += err;
fd[sock].bytes += err;
follow_links(sock);
if (fd[sock].length != INT_MAX) {
fd[sock].length -= err;
}
fd[sock].active = ink_get_hrtime_internal();
if (verbose) {
printf("read %d got %d togo %d %d %d\n", sock, err, fd[sock].length, fd[sock].keepalive, fd[sock].drop_after_CL);
}
}
if (fd[sock].length <= 0 && (fd[sock].keepalive > 0 || fd[sock].drop_after_CL)) {
goto Ldone;
}
return 0;
Ldone:
if (!fd[sock].client_abort && !(server_abort_rate > 0) && fd[sock].length && fd[sock].length != INT_MAX) {
if (verbose || verbose_errors) {
printf("bad length %d wanted %d after %d ms: '%s'\n", fd[sock].response_length - fd[sock].length, fd[sock].response_length,
(int)((ink_get_hrtime_internal() - fd[sock].active) / HRTIME_MSECOND), fd[sock].base_url);
}
return read_response_error(sock);
}
if (verbose) {
printf("read %d done\n", sock);
}
new_ops++;
double thislatency = elapsed_from_start(sock);
latency += (int)thislatency;
lat_ops++;
if (fd[sock].keepalive > 0) {
fd[sock].reset();
put_ka(sock);
current_clients--;
if (urls_mode) {
undefer_url();
return 0;
}
} else {
fd[sock].close();
}
if (!urls_mode && !client_rate) {
make_bfc_client(proxy_addr, proxy_port);
}
return 0;
}
static int
write_request(int sock)
{
int err = 0;
// send request header
if (!fd[sock].send_header) {
do {
err = write(sock, fd[sock].req_header + fd[sock].req_pos, fd[sock].length - fd[sock].req_pos);
} while ((err == -1) && (errno == EINTR));
if (err <= 0) {
if (!err) {
if (verbose_errors) {
printf("write %d closed early\n", sock);
}
goto Lerror;
}
if (errno == EAGAIN || errno == ENOTCONN) {
return 0;
}
perror("write");
goto Lerror;
}
if (verbose) {
printf("write %d %d\n", sock, err);
}
new_tbytes += err;
total_client_request_bytes += err;
fd[sock].req_pos += err;
fd[sock].active = ink_get_hrtime_internal();
if (fd[sock].req_pos >= fd[sock].length) {
if (verbose) {
printf("write request header complete %d %d\n", sock, fd[sock].length);
}
fd[sock].req_pos = 0;
fd[sock].length = fd[sock].response_length;
if (!post_support || !fd[sock].post_size) {
poll_set(sock, read_response);
return 0;
}
fd[sock].send_header = 1;
}
}
// send request body
ink_assert(MAX_RESPONSE_LENGTH > fd[sock].post_size);
if (fd[sock].send_header) {
do {
err = write(sock, response_buffer + fd[sock].req_pos, fd[sock].post_size - fd[sock].req_pos);
} while ((err == -1) && (errno == EINTR));
if (err <= 0) {
if (!err) {
if (verbose_errors) {
printf("write %d closed early\n", sock);
}
goto Lerror;
}
if (errno == EAGAIN || errno == ENOTCONN) {
return 0;
}
perror("write");
goto Lerror;
}
if (verbose) {
printf("write %d %d\n", sock, err);
}
new_tbytes += err;
total_client_request_bytes += err;
fd[sock].req_pos += err;
fd[sock].active = ink_get_hrtime_internal();
if (fd[sock].req_pos >= fd[sock].post_size) {
if (verbose) {
printf("write request body complete %d %d\n", sock, fd[sock].length);
}
fd[sock].send_header = 0;
fd[sock].req_pos = 0;
fd[sock].length = fd[sock].response_length;
poll_set(sock, read_response);
}
}
return 0;
Lerror:
errors++;
#ifndef RETRY_CLIENT_WRITE_ERRORS
if (!--nclients) {
panic("no more clients\n");
}
return 1;
#else
if (!urls_mode)
make_bfc_client(proxy_host, proxy_port);
fd[sock].close();
return 0;
#endif
}
static int
write_ftp_response(int sock)
{
int err = 0;
do {
err = write(sock, fd[sock].req_header + fd[sock].req_pos, fd[sock].length - fd[sock].req_pos);
} while ((err == -1) && (errno == EINTR));
if (err <= 0) {
if (!err) {
if (verbose_errors) {
printf("write %d closed early\n", sock);
}
goto Lerror;
}
if (errno == EAGAIN || errno == ENOTCONN) {
return 0;
}
perror("write");
goto Lerror;
}
if (verbose) {
printf("write %d %d\n", sock, err);
}
new_tbytes += err;
fd[sock].req_pos += err;
if (fd[sock].req_pos >= fd[sock].length) {
if (verbose) {
printf("write complete %d %d\n", sock, fd[sock].length);
}
fd[sock].req_pos = 0;
fd[sock].length = fd[sock].response_length;
poll_set(sock, read_ftp_request);
}
return 0;
Lerror:
errors++;
return 1;
}
static int
make_client(unsigned int addr, int port)
{
struct linger lngr;
int sock = socket(PF_INET, SOCK_STREAM, 0);
if (sock < 0) {
panic_perror("socket");
}
if (fcntl(sock, F_SETFL, O_NONBLOCK) < 0) {
panic_perror("fcntl");
}
/* tweak buffer size so that remote end can't close connection too fast */
#if 0
int bufsize = cbuffersize;
if (setsockopt(sock,SOL_SOCKET,SO_RCVBUF,
(const char *)&bufsize,sizeof(bufsize)) < 0)
panic_perror("setsockopt");
if (setsockopt(sock,SOL_SOCKET,SO_SNDBUF,
(const char *)&bufsize,sizeof(bufsize)) < 0)
panic_perror("setsockopt");
#endif
int enable = 1;
if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (const char *)&enable, sizeof(enable)) < 0) {
panic_perror("setsockopt");
}
/* Tell the socket not to linger on exit */
lngr.l_onoff = 1;
lngr.l_linger = 0;
if (!ftp) { // this causes problems for PORT ftp -- ewong
if (setsockopt(sock, SOL_SOCKET, SO_LINGER, (char *)&lngr, sizeof(struct linger)) < 0) {
perror("setsockopt");
exit(EXIT_FAILURE);
}
}
/* Give the socket a name. */
struct sockaddr_in name;
memset(&name, 0, sizeof(sockaddr_in));
name.sin_family = AF_INET;
name.sin_port = htons(port);
name.sin_addr.s_addr = addr;
if (verbose) {
printf("connecting to %u.%u.%u.%u:%d\n", ((unsigned char *)&addr)[0], ((unsigned char *)&addr)[1], ((unsigned char *)&addr)[2],
((unsigned char *)&addr)[3], port);
}
while (connect(sock, (struct sockaddr *)&name, sizeof(name)) < 0) {
if (errno == EINTR) {
continue;
}
if (errno == EINPROGRESS) {
break;
}
if (verbose_errors) {
fprintf(stderr, "connect failed errno = %d\n", errno);
}
errors++;
close(sock);
return -1;
}
init_client(sock);
fd[sock].ip = addr;
clients++;
current_clients++;
new_clients++;
return sock;
}
static void
make_range_header(int sock, double dr, char *rbuf, int size_limit)
{
int tmp[3];
if (!rbuf || !size_limit)
return;
tmp[0] = gen_bfc_dist(dr - 1.0);
// coverity[dont_call]
tmp[1] = ((int)(drand48() * 1000000)) % (tmp[0] - 1 - 0 + 1);
// coverity[dont_call]
tmp[2] = ((int)(drand48() * 1000000)) % (tmp[0] - 1 - 0 + 1) + tmp[1] + 100;
if (tmp[0] > 100) {
if (tmp[0] <= tmp[2]) {
tmp[2] = tmp[0] - 1;
}
if (tmp[2] - tmp[1] < 100) {
tmp[1] = tmp[2] - 100;
}
} else {
tmp[1] = 0;
tmp[2] = 99;
}
fd[sock].response_length = tmp[0];
fd[sock].range_start = tmp[1] > tmp[2] ? tmp[2] : tmp[1];
fd[sock].range_end = tmp[1] < tmp[2] ? tmp[2] : tmp[1];
ink_assert((int)(fd[sock].range_end - fd[sock].range_start + 1) >= 100);
snprintf(rbuf, size_limit, "Range: bytes=%lu-%lu\r\n", fd[sock].range_start, fd[sock].range_end);
}
static void
make_random_url(int sock, double *dr, double *h)
{
// coverity[dont_call]
*dr = drand48();
// coverity[dont_call]
*h = drand48();
if (zipf == 0.0) {
if (*h < hitrate) {
*dr = 1.0 + (floor(*dr * hotset) / hotset);
fd[sock].response_length = gen_bfc_dist(*dr - 1.0);
} else
fd[sock].response_length = gen_bfc_dist(*dr);
} else {
unsigned long long int doc = get_zipf(*dr);
// Some large randomish number.
unsigned long long int doc_len_int = doc * 0x14A4D0FB0E93E3A7LL;
unsigned long int x = doc_len_int;
double y = (double)x;
y /= 0x100000000LL; // deterministic random number between 0 and 1.0
fd[sock].response_length = gen_bfc_dist(y);
*dr = doc;
range_mode = 0;
}
}
static int
make_nohost_request(int sock, double dr, const char *evo_str, const char *extension, const char *eheaders, const char *rbuf,
const char *cookie)
{
int post_length = 0;
switch (post_support) {
case 0:
if (range_mode) {
sprintf(fd[sock].req_header,
"GET http://%s:%d/%12.10f/%d%s%s HTTP/1.1\r\n"
"%s"
"%s"
"%s"
"%s"
"%s"
"%s"
"\r\n",
local_host, server_port, dr, fd[sock].response_length, evo_str, extension,
fd[sock].keepalive ? "Proxy-Connection: Keep-Alive\r\n" : "Connection: close\r\n",
// coverity[dont_call]
reload_rate > drand48() ? "Pragma: no-cache\r\n" : "", eheaders, "Host: localhost\r\n", rbuf, cookie);
} else {
sprintf(fd[sock].req_header,
ftp ? "GET ftp://%s:%d/%12.10f/%d%s%s HTTP/1.0\r\n"
"%s"
"%s"
"%s"
"%s"
"\r\n" :
"GET http://%s:%d/%12.10f/%d%s%s HTTP/1.0\r\n"
"%s"
"%s"
"%s"
"%s"
"\r\n",
local_host, server_port, dr, fd[sock].response_length, evo_str, extension,
fd[sock].keepalive ? "Proxy-Connection: Keep-Alive\r\n" : "",
// coverity[dont_call]
reload_rate > drand48() ? "Pragma: no-cache\r\n" : "", eheaders, cookie);
}
break;
case 1:
if (range_mode) {
sprintf(fd[sock].req_header,
"POST http://%s:%d/%12.10f/%d%s%s HTTP/1.1\r\n"
"Content-Length: %d\r\n"
"%s"
"%s"
"%s"
"%s"
"%s"
"%s"
"\r\n",
local_host, server_port, dr, fd[sock].response_length, evo_str, extension, fd[sock].response_length,
fd[sock].keepalive ? "Proxy-Connection: Keep-Alive\r\n" : "Connection: close\r\n",
// coverity[dont_call]
reload_rate > drand48() ? "Pragma: no-cache\r\n" : "", eheaders, "Host: localhost\r\n", rbuf, cookie);
} else {
sprintf(fd[sock].req_header,
"POST http://%s:%d/%12.10f/%d%s%s HTTP/1.0\r\n"
"Content-Length: %d\r\n"
"%s"
"%s"
"%s"
"%s"
"\r\n",
local_host, server_port, dr, fd[sock].response_length, evo_str, extension, fd[sock].response_length,
fd[sock].keepalive ? "Proxy-Connection: Keep-Alive\r\n" : "",
// coverity[dont_call]
reload_rate > drand48() ? "Pragma: no-cache\r\n" : "", eheaders, cookie);
}
post_length = fd[sock].response_length;
break;
case 2:
if (!post_size)
ink_assert(!"post_size should never be zero!");
if (range_mode) {
sprintf(fd[sock].req_header,
"POST http://%s:%d/%12.10f/%d%s%s HTTP/1.1\r\n"
"Content-Length: %d\r\n"
"%s"
"%s"
"%s"
"%s"
"%s"
"%s"
"\r\n",
local_host, server_port, dr, fd[sock].response_length, evo_str, extension, post_size,
fd[sock].keepalive ? "Proxy-Connection: Keep-Alive\r\n" : "Connection: close\r\n",
// coverity[dont_call]
reload_rate > drand48() ? "Pragma: no-cache\r\n" : "", eheaders, "Host: localhost\r\n", rbuf, cookie);
} else {
sprintf(fd[sock].req_header,
"POST http://%s:%d/%12.10f/%d%s%s HTTP/1.0\r\n"
"Content-Length: %d\r\n"
"%s"
"%s"
"%s"
"%s"
"\r\n",
local_host, server_port, dr, fd[sock].response_length, evo_str, extension, post_size,
fd[sock].keepalive ? "Proxy-Connection: Keep-Alive\r\n" : "",
// coverity[dont_call]
reload_rate > drand48() ? "Pragma: no-cache\r\n" : "", eheaders, cookie);
}
post_length = post_size;
break;
}
return post_length;
}
static int
make_host1_request(int sock, double dr, const char *evo_str, const char *extension, const char *eheaders, const char *cookie)
{
sprintf(fd[sock].req_header,
"GET /%12.10f/%d%s%s HTTP/1.0\r\n"
"Host: %s:%d\r\n"
"%s"
"%s"
"%s"
"%s"
"\r\n",
dr, fd[sock].response_length, evo_str, extension, local_host, server_port,
fd[sock].keepalive ? "Connection: Keep-Alive\r\n" : "",
// coverity[dont_call]
reload_rate > drand48() ? "Pragma: no-cache\r\n" : "", eheaders, cookie);
return 0;
}
static int
make_host2_request(int sock, double dr, const char *evo_str, const char *extension, const char *eheaders, const char *cookie)
{
/* Send a non-proxy client request i.e. for Transparency testing */
sprintf(fd[sock].req_header,
"GET /%12.10f/%d%s%s HTTP/1.0\r\n"
"%s"
"%s"
"%s"
"%s"
"\r\n",
dr, fd[sock].response_length, evo_str, extension, fd[sock].keepalive ? "Connection: Keep-Alive\r\n" : "",
// coverity[dont_call]
reload_rate > drand48() ? "Pragma: no-cache\r\n" : "", eheaders, cookie);
return 0;
}
static int
build_request(int sock)
{
double dr, h;
char rbuf[1024];
make_random_url(sock, &dr, &h);
if (verbose) {
printf("gen_bfc_dist %d\n", fd[sock].response_length);
}
if (range_mode) {
make_range_header(sock, dr, rbuf, 1024);
}
char eheaders[16384];
*eheaders = 0;
int nheaders = extra_headers;
if (nheaders > 0) {
char *eh = eheaders;
if (!vary_user_agent) {
eh += sprintf(eh, "User-Agent: Mozilla/4.04 [en] (X11; I; Linux 2.0.31 i586)\r\n");
nheaders--;
}
if (nheaders > 0) {
eh += sprintf(eh, "Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*\r\n");
}
while (--nheaders > 0) {
eh += sprintf(eh, "Extra-Header%d: a lot of junk for header %d\r\n", nheaders, nheaders);
}
}
char cookie[256];
*cookie = 0;
// coverity[dont_call]
fd[sock].nalternate = (int)(alternates * drand48());
if (alternates) {
if (!vary_user_agent) {
sprintf(cookie, "Cookie: jtest-cookie-%d\r\n", fd[sock].nalternate);
} else {
sprintf(cookie, "User-Agent: jtest-browser-%d\r\n", fd[sock].nalternate);
}
}
const char *extension;
switch (request_extension) {
case 1:
extension = ".html";
break;
case 2:
extension = ".jpeg";
break;
case 3:
extension = "/";
break;
default:
extension = (compd_suite ? ".jpeg" : "");
}
char evo_str[20];
evo_str[0] = '\0';
if (evo_rate != 0.0) {
double evo_index = dr + (((double)now) / HRTIME_HOUR) * evo_rate;
sprintf(evo_str, ".%u", ((unsigned int)evo_index));
}
int post_body = 0;
switch (hostrequest) {
case 0:
post_body = make_nohost_request(sock, dr, evo_str, extension, eheaders, rbuf, cookie);
break;
case 1:
post_body = make_host1_request(sock, dr, evo_str, extension, eheaders, cookie);
break;
case 2:
post_body = make_host2_request(sock, dr, evo_str, extension, eheaders, cookie);
break;
default:
ink_release_assert(!"Unexpected hostrequest! Abort.");
return 0;
}
if (range_mode) {
fd[sock].response_length = fd[sock].range_end - fd[sock].range_start + 1;
ink_assert(fd[sock].response_length > 0);
}
return post_body;
}
static void
make_bfc_client(unsigned int addr, int port)
{
int sock = -1;
char rbuf[1024];
memset(rbuf, 0, 1024);
if (bandwidth_test && bandwidth_test_to_go-- <= 0) {
return;
}
if (keepalive) {
sock = get_ka(addr);
}
if (sock < 0) {
sock = make_client(addr, port);
fd[sock].keepalive = keepalive;
} else {
init_client(sock);
current_clients++;
fd[sock].keepalive--;
}
if (sock < 0) {
panic("unable to open client connection\n");
}
fd[sock].post_size = build_request(sock);
if (verbose) {
printf("request %d [%s]\n", sock, fd[sock].req_header);
}
fd[sock].length = strlen(fd[sock].req_header);
{
char *s = fd[sock].req_header;
char *e = (char *)memchr(s, '\r', 512);
char *url = fd[sock].base_url;
memcpy(url, s, e - s);
url[e - s] = 0;
if (show_before) {
printf("%s\n", url);
}
}
if (show_headers) {
printf("Request to Proxy: {\n%s}\n", fd[sock].req_header);
}
}
#define RUNNING(_n) \
total_##_n = (((total_##_n * (average_over - 1)) / average_over) + new_##_n); \
running_##_n = total_##_n / average_over; \
new_##_n = 0;
#define RUNNING_AVG(_t, _n, _o) \
_t = _o ? ((_t * (average_over - 1) + _n / _o) / average_over) : _t; \
_n = 0;
void
interval_report()
{
static int here = 0;
now = ink_get_hrtime_internal();
if (!(here++ % 20)) {
printf(" con new ops 1B lat bytes/per svrs new ops total time err\n");
}
RUNNING(clients);
RUNNING_AVG(running_latency, latency, lat_ops);
lat_ops = 0;
RUNNING_AVG(running_b1latency, b1latency, b1_ops);
b1_ops = 0;
RUNNING(cbytes);
RUNNING(ops);
RUNNING(servers);
RUNNING(sops);
RUNNING(tbytes);
float t = (float)(now - start_time);
uint64_t per = current_clients ? running_cbytes / current_clients : 0;
printf("%4d %4d %7.1f %4d %4d %10" PRIu64 "/%-6" PRIu64 " %4d %4d %4d %9" PRIu64 " %6.1f %4d\n",
current_clients, // clients, n_ka_cache,
running_clients, running_ops, running_b1latency, running_latency, running_cbytes, per, running_servers, running_servers,
running_sops, running_tbytes, t / ((float)HRTIME_SECOND), errors);
if (is_done()) {
printf("Total Client Request Bytes:\t\t%" PRIu64 "\n", total_client_request_bytes);
printf("Total Server Response Header Bytes:\t%" PRIu64 "\n", total_server_response_header_bytes);
printf("Total Server Response Body Bytes:\t%" PRIu64 "\n", total_server_response_body_bytes);
printf("Total Proxy Request Bytes:\t\t%" PRIu64 "\n", total_proxy_request_bytes);
printf("Total Proxy Response Header Bytes:\t%" PRIu64 "\n", total_proxy_response_header_bytes);
printf("Total Proxy Response Body Bytes:\t%" PRIu64 "\n", total_proxy_response_body_bytes);
}
}
#define URL_HASH_ENTRIES url_hash_entries
#define BYTES_PER_ENTRY 3
#define ENTRIES_PER_BUCKET 16
#define OVERFLOW_ENTRIES 1024 // many many
#define BUCKETS (URL_HASH_ENTRIES / ENTRIES_PER_BUCKET)
#define BYTES_PER_BUCKET (BYTES_PER_ENTRY * ENTRIES_PER_BUCKET)
#define URL_HASH_BYTES (BYTES_PER_ENTRY * (URL_HASH_ENTRIES + OVERFLOW_ENTRIES))
// NOTE: change to match BYTES_PER_ENTRY
#define ENTRY_TAG(_x) (((unsigned int)_x[0] << 16) + ((unsigned int)_x[1] << 8) + (unsigned int)_x[2])
#define SET_ENTRY_TAG(_x, _t) \
_x[0] = _t >> 16; \
_x[1] = (_t >> 8) & 0xFF; \
_x[2] = _t & 0xFF;
#define MASK_TAG(_x) (_x & ((1U << (BYTES_PER_ENTRY * 8)) - 1))
#define BEGIN_HASH_LOOP \
unsigned int bucket = (i % BUCKETS); \
unsigned int tag = MASK_TAG((unsigned int)(i / BUCKETS)); \
if (!tag) \
tag++; \
unsigned char *base = bytes + bucket * BYTES_PER_BUCKET; \
unsigned char *last = bytes + (bucket + 1) * BYTES_PER_BUCKET - BYTES_PER_ENTRY; \
(void)last; \
\
for (unsigned int x = 0; x < ENTRIES_PER_BUCKET; x++) { \
unsigned char *e = base + x * BYTES_PER_ENTRY;
#define BEGIN_OVERFLOW_HASH_LOOP \
for (unsigned int j = 0; j < ENTRIES_PER_BUCKET; j++) { \
unsigned char *e = base + (URL_HASH_ENTRIES + j) * BYTES_PER_ENTRY;
#define END_HASH_LOOP }
struct UrlHashTable {
unsigned int numbytes;
unsigned char *bytes;
int fd;
void
zero()
{
memset(bytes, 0, numbytes);
}
void alloc(unsigned int want);
void
set(uint64_t i)
{
BEGIN_HASH_LOOP
{
if (!ENTRY_TAG(e)) {
SET_ENTRY_TAG(e, tag);
return;
}
}
END_HASH_LOOP;
fprintf(stderr, "url hash table overflow: %X, %X\n", (int)(base - bytes), tag);
BEGIN_OVERFLOW_HASH_LOOP
{
if (!ENTRY_TAG(e)) {
SET_ENTRY_TAG(e, tag);
return;
}
}
END_HASH_LOOP;
ink_fatal("overview entries overflow");
}
void
clear(uint64_t i)
{
BEGIN_HASH_LOOP
{
if (ENTRY_TAG(e) == tag) {
if (e != last) {
SET_ENTRY_TAG(e, ENTRY_TAG(last));
}
SET_ENTRY_TAG(last, 0);
return;
}
}
END_HASH_LOOP;
fprintf(stderr, "url hash table entry to clear not found: %X, %X\n", (int)(base - bytes), tag);
}
int
is_set(uint64_t i)
{
BEGIN_HASH_LOOP
{
if (ENTRY_TAG(e) == tag) {
return 1;
}
}
END_HASH_LOOP;
if (ENTRY_TAG((last))) {
BEGIN_OVERFLOW_HASH_LOOP
{
if (ENTRY_TAG(e) == tag) {
return 1;
}
}
END_HASH_LOOP;
}
return 0;
}
UrlHashTable();
~UrlHashTable();
};
UrlHashTable *uniq_urls = nullptr;
UrlHashTable::UrlHashTable() : numbytes(0), bytes(nullptr), fd(-1)
{
off_t len = 0;
if (!url_hash_entries) {
return;
}
if (*url_hash_filename) {
if ((fd = open(url_hash_filename, O_RDWR | O_CREAT, 0644)) == -1) {
panic_perror("failed to open URL Hash file");
}
len = lseek(fd, 0, SEEK_END);
}
if (url_hash_entries > 0) {
// if they specify the number of entries round it up
url_hash_entries = (url_hash_entries + ENTRIES_PER_BUCKET - 1) & ~(ENTRIES_PER_BUCKET - 1);
numbytes = URL_HASH_BYTES;
// ensure it is either a new file or the correct size
if (len != 0 && len != numbytes) {
panic("specified size != file size\n");
}
} else {
// otherwise make sure the file is non-zero and then use its
// size as the size
if (!len) {
panic("zero size URL Hash Table\n");
}
if (len != URL_HASH_BYTES) {
fprintf(stderr, "FATAL: hash file length (%jd) != URL_HASH_BYTES (%jd)\n", (intmax_t)len, (intmax_t)URL_HASH_BYTES);
exit(1);
}
numbytes = len;
}
if (*url_hash_filename) {
if (ftruncate(fd, numbytes) == -1) {
panic_perror("unable to truncate URL Hash file");
}
bytes = (unsigned char *)mmap(nullptr, numbytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (bytes == (unsigned char *)MAP_FAILED || !bytes) {
panic("unable to map URL Hash file\n");
}
} else {
bytes = (unsigned char *)malloc(numbytes);
ink_assert(bytes);
zero();
}
} // UrlHashTable::UrlHashTable
UrlHashTable::~UrlHashTable()
{
if (bytes) {
munmap((char *)bytes, numbytes);
}
if (fd != -1) {
close(fd);
}
} // UrlHashTable::~UrlHashTable
static int
seen_it(char *url)
{
if (!url_hash_entries) {
return 0;
}
int l = 0;
char *para = strrchr(url, '#');
if (para) {
l = para - url;
} else {
l = strlen(url);
}
CryptoHash hash;
CryptoContext().hash_immediate(hash, reinterpret_cast<void *>(url), l);
uint64_t x = hash.fold();
if (uniq_urls->is_set(x)) {
if (verbose) {
printf("YES: seen it '%s'\n", url);
}
return 1;
}
uniq_urls->set(x);
if (verbose) {
printf("NO: marked it '%s'\n", url);
}
return 0;
}
static int
make_url_client(const char *url, const char *base_url, bool seen, bool unthrottled)
{
int iport = 80;
unsigned int ip = 0;
char curl[512] = {0};
char sche[8], host[512], port[10], path[512], frag[512], quer[512], para[512];
int xsche, xhost, xport, xpath, xfrag, xquer, xpar, rel, slash;
if (base_url) {
ink_web_canonicalize_url(base_url, url, curl, 512);
// hack for our own web server!
if (curl[strlen(curl) - 1] == 13) {
curl[strlen(curl) - 1] = 0;
}
if (curl[strlen(curl) - 1] == 12) {
curl[strlen(curl) - 1] = 0;
}
} else {
strncpy(curl, url, sizeof(curl) - 1);
}
if (!seen && seen_it(curl)) {
return -1;
}
ink_web_decompose_url(curl, sche, host, port, path, frag, quer, para, &xsche, &xhost, &xport, &xpath, &xfrag, &xquer, &xpar, &rel,
&slash);
if (follow_same) {
if (!xhost || strcasecmp(host, current_host)) {
if (verbose) {
printf("skipping %s\n", curl);
}
return -1;
}
}
if (!unthrottled && throttling_connections()) {
defer_url(curl);
return -1;
}
if (proxy_port) {
iport = proxy_port;
ip = proxy_addr;
} else {
if (xport) {
iport = ts::svtoi(port);
}
if (!xhost) {
if (verbose) {
fprintf(stderr, "bad url '%s'\n", curl);
}
return -1;
}
ip = get_addr(host);
if ((int)ip == -1) {
if (verbose || verbose_errors) {
fprintf(stderr, "bad host '%s'\n", host);
}
return -1;
}
}
int sock = -1;
if (keepalive) {
sock = get_ka(ip);
}
if (sock < 0) {
sock = make_client(ip, iport);
fd[sock].keepalive = keepalive;
} else {
init_client(sock);
current_clients++;
fd[sock].keepalive--;
}
if (sock < 0) {
panic("cannot make client\n");
}
char eheaders[16384];
*eheaders = 0;
int nheaders = extra_headers;
memset(&eheaders, 0, 16384);
if (nheaders > 0) {
char *eh = eheaders;
if (!vary_user_agent) {
eh += sprintf(eh, "User-Agent: Mozilla/4.04 [en] (X11; I; Linux 2.0.31 i586)\r\n");
nheaders--;
}
if (nheaders > 0) {
eh += sprintf(eh, "Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*\r\n");
}
while (--nheaders > 0) {
eh += sprintf(eh, "Extra-Header%d: a lot of junk for header %d\r\n", nheaders, nheaders);
}
}
if (proxy_port) {
sprintf(fd[sock].req_header,
"GET %s HTTP/1.0\r\n"
"%s"
"%s"
"Accept: */*\r\n"
"%s"
"\r\n",
curl,
// coverity[dont_call]
reload_rate > drand48() ? "Pragma: no-cache\r\n" : "", fd[sock].keepalive ? "Proxy-Connection: Keep-Alive\r\n" : "",
eheaders);
} else {
sprintf(fd[sock].req_header,
"GET /%s%s%s%s%s HTTP/1.0\r\n"
"Host: %s\r\n"
"%s"
"%s"
"Accept: */*\r\n"
"%s"
"\r\n",
path, xquer ? "?" : "", quer, xpar ? ";" : "", para, host,
// coverity[dont_call]
reload_rate > drand48() ? "Pragma: no-cache\r\n" : "", fd[sock].keepalive ? "Connection: Keep-Alive\r\n" : "",
eheaders);
}
if (verbose) {
printf("curl = '%s'\n", curl);
}
if (show_before) {
printf("%s\n", curl);
}
if (urlsdump_fp) {
fprintf(urlsdump_fp, "%s\n", curl);
}
if (show_headers) {
printf("Request to Proxy: {\n%s}\n", fd[sock].req_header);
}
{
const char *ext = strrchr(path, '.');
fd[sock].binary = 0;
if (ext) {
fd[sock].binary = !strncasecmp(ext, ".gif", 4) || !strncasecmp(ext, ".jpg", 4);
}
}
fd[sock].response_length = 0;
fd[sock].length = strlen(fd[sock].req_header);
if (!fd[sock].response) {
fd[sock].response = (char *)malloc(MAX_BUFSIZE);
}
strcpy(fd[sock].base_url, curl);
return sock;
}
static FILE *
get_defered_urls(FILE *fp)
{
char url[512];
while (fgets(url, 512, fp)) {
if (n_defered_urls > MAX_DEFERED_URLS - 2) {
return nullptr;
}
char *e = (char *)memchr(url, '\n', 512);
if (e) {
*e = 0;
}
make_url_client(url);
}
return fp;
}
int
main(int argc __attribute__((unused)), const char *argv[])
{
appVersionInfo.setup(PACKAGE_NAME, "jtest", PACKAGE_VERSION, __DATE__, __TIME__, BUILD_MACHINE, BUILD_PERSON, "");
/* for QA -- we want to be able to tail an output file
* during execution "nohup jtest -P pxy -p prt &"
*/
setvbuf(stdout, (char *)nullptr, _IOLBF, 0);
fd = (FD *)malloc(MAXFDS * sizeof(FD));
memset(static_cast<void *>(fd), 0, MAXFDS * sizeof(FD));
process_args(&appVersionInfo, argument_descriptions, n_argument_descriptions, argv);
if (!drand_seed) {
// coverity[dont_call]
srand48((long)time(nullptr));
} else {
// coverity[dont_call]
srand48((long)drand_seed);
}
if (zipf != 0.0) {
build_zipf();
}
int max_fds = max_limit_fd();
if (verbose) {
printf("maximum of %d connections\n", max_fds);
}
signal(SIGPIPE, SIG_IGN);
start_time = now = ink_get_hrtime_internal();
urls_mode = n_file_arguments || *urls_file;
nclients = client_rate ? 0 : nclients;
if (!local_host[0]) {
if (gethostname(local_host, sizeof(local_host)) != 0) {
panic_perror("gethostname failed");
}
}
local_addr = get_addr(local_host);
if (!proxy_host[0]) {
strncpy(proxy_host, local_host, sizeof(proxy_host) - 1);
proxy_host[sizeof(proxy_host) - 1] = 0;
}
if (proxy_port) {
proxy_addr = get_addr(proxy_host);
}
if (!urls_mode) {
if (compd_port) {
build_response();
open_server(compd_port, accept_compd);
} else {
if (!server_port) {
server_port = proxy_port + 1000;
}
build_response();
if (!only_clients) {
for (int retry = 0; retry < 20; retry++) {
server_fd = open_server(server_port + retry, accept_read);
if (server_fd < 0) {
if (server_fd == -EADDRINUSE) {
continue;
}
panic_perror("open_server");
}
break;
}
}
bandwidth_test_to_go = bandwidth_test;
if (!only_server) {
if (proxy_port) {
for (int i = 0; i < nclients; i++) {
make_bfc_client(proxy_addr, proxy_port);
}
}
}
}
} else {
if (check_content) {
build_response();
}
follow = follow_arg;
follow_same = follow_same_arg;
uniq_urls = new UrlHashTable;
defered_urls = (char **)malloc(sizeof(char *) * MAX_DEFERED_URLS);
average_over = 1;
if (*urlsdump_file) {
urlsdump_fp = fopen(urlsdump_file, "w");
if (!urlsdump_fp) {
panic_perror("fopen urlsdump file");
}
}
if (*urls_file) {
FILE *fp = fopen(urls_file, "r");
if (!fp) {
panic_perror("fopen urls file");
}
if (get_defered_urls(fp)) {
fclose(fp);
} else {
urls_fp = fp;
}
}
for (unsigned i = 0; i < n_file_arguments; i++) {
char sche[8], host[512], port[10], path[512], frag[512], quer[512], para[512];
int xsche, xhost, xport, xpath, xfrag, xquer, xpar, rel, slash;
ink_web_decompose_url(file_arguments[i], sche, host, port, path, frag, quer, para, &xsche, &xhost, &xport, &xpath, &xfrag,
&xquer, &xpar, &rel, &slash);
if (xhost) {
strcpy(current_host, host);
}
}
for (unsigned i = 0; i < n_file_arguments; i++) {
make_url_client(file_arguments[i]);
}
}
int t = now / HRTIME_SECOND;
int tclient = now / HRTIME_SECOND;
int start = now / HRTIME_SECOND;
while (1) {
if (poll_loop()) {
break;
}
int t2 = now / HRTIME_SECOND;
if (urls_fp && n_defered_urls < MAX_DEFERED_URLS - DEFERED_URLS_BLOCK - 2) {
if (get_defered_urls(urls_fp)) {
fclose(urls_fp);
urls_fp = nullptr;
}
}
if ((!urls_mode || client_rate) && interval && t + interval <= t2) {
t = t2;
interval_report();
}
if (t2 != tclient) {
for (int i = 0; i < client_rate * (t2 - tclient); i++) {
if (!urls_mode) {
make_bfc_client(proxy_addr, proxy_port);
} else {
undefer_url(true);
}
}
tclient = t2;
}
if (test_time) {
if (t2 - start > test_time) {
done();
}
}
if (is_done()) {
done();
}
}
return 0;
}
/*---------------------------------------------------------------------------*
int ink_web_decompose_url(...)
This function takes an input URL in src_url and splits it into its
component parts, including a scheme, host, port, path, fragment,
query, and parameters. you must pass in buffers for each of these.
If you pass in a nullptr pointer for any of these, it will not be
returned.
The flags "sche_exists", etc. tell you if that part of the URL was
found. Each unfound part (with a non-nullptr buffer) will also contain
the empty string '\0'.
The flag "relative_url" indicates that the src_url did not start
with a scheme. (This is kind of redundant with sche_exists but is
the general way to do it.)
The flag "leading_slash" indicates that the path began with a
leading slash.
mep - 4/15/96
*---------------------------------------------------------------------------*/
static void
ink_web_decompose_url(const char *src_url, char *sche, char *host, char *port, char *path, char *frag, char *quer, char *para,
int *real_sche_exists, int *real_host_exists, int *real_port_exists, int *real_path_exists,
int *real_frag_exists, int *real_quer_exists, int *real_para_exists, int *real_relative_url,
int *real_leading_slash)
/*
* Input: src_url
* Outputs: every other argument
*
* You may pass in nullptr pointers for any of: sche, host, port, path,
* frag, quer, or para, and they will not be returned.
*
*
* According to the HTML Sourcebook, a URL consists:
*
* http://www.address.edu:80/path/subdir/file.ext?query;params#fragment
* aaaa bbbbbbbbbbbbbbb cc dddddddddddddddddddd eeeee ffffff gggggggg
*
* where
* a = scheme
* b = host
* c = port
* d = path
* e = query
* f = params
* g = fragment
*
* Order of parsing is: fragment, scheme, host, port, params, query, path
*
* Note that the hostname:port part may contain something like:
* user@pass:hostname:port
* bbbbbbbbbbbbbbbbbb cccc
* i.e., the port is the thing after the _last_ colon in this part
*
*/
{
const char *start = src_url;
int len = strlen(src_url);
const char *end = start + len;
const char *ptr = start;
const char *ptr2, *temp, *temp2;
const char *sche1 = nullptr, *sche2 = nullptr;
const char *host1 = nullptr, *host2 = nullptr;
const char *port1 = nullptr, *port2 = nullptr;
const char *path1 = nullptr, *path2 = nullptr;
const char *frag1 = nullptr, *frag2 = nullptr;
const char *quer1 = nullptr, *quer2 = nullptr;
const char *para1 = nullptr, *para2 = nullptr;
bool fail = false;
int num;
int sche_exists = 0;
int host_exists = 0;
int port_exists = 0;
int path_exists = 0;
int frag_exists = 0;
int quer_exists = 0;
int para_exists = 0;
int leading_slash = 0;
temp2 = ptr;
/* strip fragments "#" off the end */
while (ptr < end) {
if (*ptr == '#') {
frag1 = ptr + 1;
frag2 = end;
frag_exists = 1;
end = ptr;
}
ptr++;
}
ptr = temp2;
/* decide if there is a sche, i.e. if it's an absolute url */
/* find end of sche */
fail = false;
temp2 = ptr;
while ((ptr < end) && !fail) {
if (*ptr == ':') {
sche1 = start;
sche2 = ptr;
ptr++; /* to continue to parse, skip the : */
sche_exists = 1;
fail = true;
} else if ((!ParseRules::is_alpha(*ptr) && (*ptr != '+') && (*ptr != '.') && (*ptr != '-')) || (ptr == end)) {
sche_exists = 0;
fail = true;
} else {
ptr++;
}
}
if (sche_exists == 0) {
ptr = temp2;
}
/* find start of host */
fail = false;
temp2 = ptr;
while ((ptr < end - 1) && !fail) {
if (*(ptr + 0) == '/') {
if (*(ptr + 1) == '/') {
host1 = ptr + 2;
ptr += 2; /* skip "//" */
host_exists = 1;
fail = true;
} else {
/* this is the start of a path, not a host */
host_exists = 0;
fail = true;
}
} else {
ptr++;
}
}
/* find end of host */
if (host_exists == 1) {
while ((ptr < end) && (host2 == nullptr)) {
if (*ptr == '/') {
/* "/" marks the start of the path */
host2 = ptr; /* just so we quit out of the loop */
} else {
ptr++;
}
}
if (host2 == nullptr) {
host2 = end;
}
if (host_exists == 1) {
temp = host2 - 1;
/* remove trailing dots from host */
while ((temp > host1) && (*temp == '.')) {
temp--;
host2--;
}
/* find start & end of port */
ptr2 = host1;
temp = host2;
while (ptr2 < temp) {
if (*ptr2 == ':') {
port1 = ptr2 + 1;
port2 = temp;
host2 = ptr2;
port_exists = 1;
}
ptr2++;
}
}
}
if (host_exists == 0) {
ptr = temp2;
}
temp2 = ptr;
/* strip query "?" off the end */
while (ptr < end) {
if (*ptr == '?') {
quer1 = ptr + 1;
quer2 = end;
quer_exists = 1;
end = ptr;
}
ptr++;
}
ptr = temp2;
temp2 = ptr;
/* strip parameters ";" off the end */
while (ptr < end) {
if (*ptr == ';') {
para1 = ptr + 1;
para2 = end;
para_exists = 1;
end = ptr;
}
ptr++;
}
ptr = temp2;
/* the path is the remainder of the string */
/* don't include any leading slash */
if (ptr < end) {
if (*ptr == '/') {
leading_slash = 1;
path1 = ptr + 1;
path2 = end;
path_exists = 1;
} else {
path1 = ptr;
path2 = end;
path_exists = 1;
}
} else {
path1 = end;
path2 = end;
path_exists = 0;
}
if (sche_exists != 1) {
*real_relative_url = 1;
} else {
*real_relative_url = 0;
}
/* extract strings for scheme, host, port, path, etc */
if (sche != nullptr) {
if (sche_exists) {
num = sche2 - sche1;
if (num > MAX_URL_LEN - 1) {
num = MAX_URL_LEN - 1;
}
strncpy(sche, sche1, num + 1);
*(sche + num) = '\0';
/* make scheme lowercase */
char *p = sche;
while (*p) {
*p = ParseRules::ink_tolower(*p);
p++;
}
} else {
*sche = 0;
}
}
if (host != nullptr) {
if (host_exists) {
num = host2 - host1;
if (num > MAX_URL_LEN - 1) {
num = MAX_URL_LEN - 1;
}
strncpy(host, host1, num + 1);
*(host + num) = '\0';
/* make hostname lowercase */
char *p = host;
while (*p) {
*p = ParseRules::ink_tolower(*p);
p++;
}
} else {
*host = 0;
}
}
if (port != nullptr) {
if (port_exists) {
num = port2 - port1;
if (num > MAX_URL_LEN - 1) {
num = MAX_URL_LEN - 1;
}
strncpy(port, port1, num + 1);
*(port + num) = '\0';
} else {
*port = 0;
}
}
if (path != nullptr) {
if (path_exists) {
num = path2 - path1;
if (num > MAX_URL_LEN - 1) {
num = MAX_URL_LEN - 1;
}
strncpy(path, path1, num + 1);
*(path + num) = '\0';
} else {
*path = 0;
}
}
if (frag != nullptr) {
if (frag_exists) {
num = frag2 - frag1;
if (num > MAX_URL_LEN - 1) {
num = MAX_URL_LEN - 1;
}
strncpy(frag, frag1, num + 1);
*(frag + num) = '\0';
} else {
*frag = 0;
}
}
if (quer != nullptr) {
if (quer_exists) {
num = quer2 - quer1;
if (num > MAX_URL_LEN - 1) {
num = MAX_URL_LEN - 1;
}
strncpy(quer, quer1, num + 1);
*(quer + num) = '\0';
} else {
*quer = 0;
}
}
if (para != nullptr) {
if (para_exists) {
num = para2 - para1;
if (num > MAX_URL_LEN - 1) {
num = MAX_URL_LEN - 1;
}
strncpy(para, para1, num + 1);
*(para + num) = '\0';
} else {
*para = 0;
}
}
*real_sche_exists = sche_exists;
*real_host_exists = host_exists;
*real_port_exists = port_exists;
*real_path_exists = path_exists;
*real_frag_exists = frag_exists;
*real_quer_exists = quer_exists;
*real_para_exists = para_exists;
*real_leading_slash = leading_slash;
} /* End ink_web_decompose_url */
#if 0 /* debugging */
/*---------------------------------------------------------------------------*
void ink_web_dump_url_components(FILE *fp, InkWebURLComponents *c)
This routine writes a readable representation of the URL components
pointed to by <c> on the file pointer <fp>.
*---------------------------------------------------------------------------*/
static void ink_web_dump_url_components(FILE *fp, InkWebURLComponents *c)
{
fprintf(fp,"sche:'%s', exists %d\n",c->sche,c->sche_exists);
fprintf(fp,"host:'%s', exists %d\n",c->host,c->host_exists);
fprintf(fp,"port:'%s', exists %d\n",c->port,c->port_exists);
fprintf(fp,"path:'%s', exists %d\n",c->path,c->path_exists);
fprintf(fp,"quer:'%s', exists %d\n",c->quer,c->quer_exists);
fprintf(fp,"frag:'%s', exists %d\n",c->frag,c->frag_exists);
fprintf(fp,"para:'%s', exists %d\n",c->para,c->para_exists);
fprintf(fp,"rel_url:%d\n",c->rel_url);
fprintf(fp,"leading_slash:%d\n",c->leading_slash);
fprintf(fp,"\n");
} /* End ink_web_dump_url_components */
#endif
/*---------------------------------------------------------------------------*
int ink_web_canonicalize_url(...)
Inputs: base_url, emb_url, max_dest_url_len.
Output: dest_url.
This function takes a base url and an embedded url, and produces an
absolute url as specified in RFC 1808, "Relative Uniform Resource
Locators".
A base url is often the url of a document and an embedded url is an
incomplete reference to a secondary document, often in the same
directory. Together they completely specify an absolute reference to
the secondary document.
For instance,
base_url "http://inktomi.com/~mep"
emb_url: "path1/path2/foo.html"
becomes
dest_url: "http://inktomi.com/~mep/path1/path2/foo.html"
This function also applies "ink_web_escapify()" to the dest_url.
You must supply the buffer dest_url and its size, max_dest_url_len.
mep - 4/15/96
*---------------------------------------------------------------------------*/
static void
ink_web_canonicalize_url(const char *base_url, const char *emb_url, char *dest_url, int max_dest_url_len)
{
int doff;
InkWebURLComponents base, emb;
char temp[MAX_URL_LEN + 1], temp2[MAX_URL_LEN + 1];
int leading_slash, use_base_sche, use_base_host, use_base_path, use_base_quer, use_base_para, use_base_frag;
int host_last = 0;
doff = 0;
/* Initialize Component Values */
leading_slash = 0;
/* Decompose The Base And Embedded URLs */
ink_web_decompose_url_into_structure(base_url, &base);
ink_web_decompose_url_into_structure(emb_url, &emb);
/* Print Out Components */
/* Select Which Components To Use From Base & Embedded URL */
dest_url[0] = '\0';
use_base_path = 0;
use_base_quer = 0;
use_base_para = 0;
use_base_frag = 0;
if (!emb.sche_exists && !emb.path_exists && !emb.host_exists && !emb.quer_exists && !emb.frag_exists && !emb.para_exists) {
/* 2a: if the embedded URL is empty, take everything from the base */
use_base_sche = 1;
use_base_host = 1;
use_base_path = 1;
use_base_quer = 1;
use_base_para = 1;
use_base_frag = 1;
} else if (emb.sche_exists && ((strcasecmp(emb.sche, "telnet") == 0) || (strcasecmp(emb.sche, "mailto") == 0) ||
(strcasecmp(emb.sche, "news") == 0))) {
const char *p = emb_url;
char *q = dest_url;
while (*p) {
*q++ = ParseRules::ink_tolower(*p++);
}
return;
} else if (emb.sche_exists && !(((strcasecmp(emb.sche, "http") == 0) && !emb.host_exists)))
{
/* 2b: not good enough, because things like 'http:overview.html' */
use_base_sche = 0;
use_base_host = 0;
use_base_path = 0;
use_base_quer = 0;
use_base_para = 0;
use_base_frag = 0;
} else {
use_base_sche = 1;
/* step 3 - if emb_host non-empty, skip to 7 */
if (emb.host_exists) {
use_base_host = 0;
} else {
use_base_host = 1;
/* step 4 - if emb_path preceded by slash, skip to 7 */
if (emb.leading_slash != 1) {
/* step 5 */
if (!emb.path_exists) {
use_base_path = 1;
if (emb.para_exists) {
/* 5a - if emb_para non-empty, skip to 7 */
use_base_para = 0;
} else {
/* otherwise use base_para */
use_base_para = 1;
if (emb.quer_exists) {
/* 5b - if emb_quer non-empty, skip to 7 */
use_base_quer = 0;
} else {
/* otherwise use base query */
use_base_quer = 1;
}
}
} else {
use_base_path = 0;
/* step 6 */
/* create combined path */
/* remove last segment of base_path */
remove_last_seg(base.path, temp);
remove_multiple_slash(temp, temp2);
/* append emb_path */
strcat(temp2, emb.path);
/* remove "." and ".." */
ink_web_remove_dots(temp2, emb.path, &leading_slash, MAX_URL_LEN);
emb.path_exists = 1;
emb.leading_slash = base.leading_slash;
} /* 5 */
} /* 4 */
} /* 3 */
}
/* step 7 - combine parts */
if (use_base_sche) {
if (base.sche_exists) {
append_string(dest_url, base.sche, &doff, MAX_URL_LEN);
append_string(dest_url, ":", &doff, MAX_URL_LEN);
host_last = 0;
}
} else {
if (emb.sche_exists) {
append_string(dest_url, emb.sche, &doff, MAX_URL_LEN);
append_string(dest_url, ":", &doff, MAX_URL_LEN);
host_last = 0;
}
}
if (use_base_host) {
if (base.host_exists) {
append_string(dest_url, "//", &doff, MAX_URL_LEN);
append_string(dest_url, base.host, &doff, MAX_URL_LEN);
if ((base.port_exists) && (strcmp(base.port, "80") != 0)) {
append_string(dest_url, ":", &doff, MAX_URL_LEN);
append_string(dest_url, base.port, &doff, MAX_URL_LEN);
}
host_last = 1;
}
} else {
if (emb.host_exists) {
append_string(dest_url, "//", &doff, MAX_URL_LEN);
append_string(dest_url, emb.host, &doff, MAX_URL_LEN);
if ((emb.port_exists) && (strcmp(emb.port, "80") != 0)) {
append_string(dest_url, ":", &doff, MAX_URL_LEN);
append_string(dest_url, emb.port, &doff, MAX_URL_LEN);
}
host_last = 1;
}
}
if (use_base_path) {
if (base.path_exists) {
if (base.leading_slash) {
append_string(dest_url, "/", &doff, MAX_URL_LEN);
}
ink_web_unescapify_string(temp, base.path, MAX_URL_LEN);
ink_web_escapify_string(base.path, temp, max_dest_url_len);
append_string(dest_url, base.path, &doff, MAX_URL_LEN);
host_last = 0;
}
} else {
if (emb.path_exists) {
if (emb.leading_slash) {
append_string(dest_url, "/", &doff, MAX_URL_LEN);
}
ink_web_unescapify_string(temp, emb.path, MAX_URL_LEN);
ink_web_escapify_string(emb.path, temp, max_dest_url_len);
append_string(dest_url, emb.path, &doff, MAX_URL_LEN);
host_last = 0;
}
}
if (use_base_para) {
if (base.para_exists) {
append_string(dest_url, ";", &doff, MAX_URL_LEN);
append_string(dest_url, base.para, &doff, MAX_URL_LEN);
host_last = 0;
}
} else {
if (emb.para_exists) {
append_string(dest_url, ";", &doff, MAX_URL_LEN);
append_string(dest_url, emb.para, &doff, MAX_URL_LEN);
host_last = 0;
}
}
if (use_base_quer) {
if (base.quer_exists) {
append_string(dest_url, "?", &doff, MAX_URL_LEN);
append_string(dest_url, base.quer, &doff, MAX_URL_LEN);
host_last = 0;
}
} else {
if (emb.quer_exists) {
append_string(dest_url, "?", &doff, MAX_URL_LEN);
append_string(dest_url, emb.quer, &doff, MAX_URL_LEN);
host_last = 0;
}
}
if (use_base_frag) {
if (base.frag_exists) {
append_string(dest_url, "#", &doff, MAX_URL_LEN);
append_string(dest_url, base.frag, &doff, MAX_URL_LEN);
host_last = 0;
}
} else {
if (emb.frag_exists) {
append_string(dest_url, "#", &doff, MAX_URL_LEN);
append_string(dest_url, emb.frag, &doff, MAX_URL_LEN);
host_last = 0;
}
}
if (host_last) {
append_string(dest_url, "/", &doff, MAX_URL_LEN);
}
}
/*---------------------------------------------------------------------------*
int ink_web_decompose_url_into_structure(char *url, InkWebURLComponents *c)
This routine takes a URL and violently tears apart its molecular structure,
placing the URL components in the InkWebURLComponents structure pointed to
by <c>. Flags in the structure indicate whether individual fields are
valid or not.
*---------------------------------------------------------------------------*/
static void
ink_web_decompose_url_into_structure(const char *url, InkWebURLComponents *c)
{
ink_web_decompose_url(url, c->sche, c->host, c->port, c->path, c->frag, c->quer, c->para, &(c->sche_exists), &(c->host_exists),
&(c->port_exists), &(c->path_exists), &(c->frag_exists), &(c->quer_exists), &(c->para_exists),
&(c->rel_url), &(c->leading_slash));
c->is_path_name = 1;
if (c->sche_exists &&
((strcasecmp(c->sche, "mailto") == 0) || (strcasecmp(c->sche, "telnet") == 0) || (strcasecmp(c->sche, "news") == 0))) {
c->is_path_name = 0;
}
} /* End ink_web_decompose_url_into_structure */
/*---------------------------------------------------------------------------*
int ink_web_remove_dots(char *src, char *dest, int *leadingslash,
int max_dest_len)
This routine takes a path and interprets "." and ".." segments, returning
an appropriately parsed path. It is a warning to pass a path that resolves
to a leading "..". Inputs are the src path and the length of the dest
buffer. Return values are a string written into the dest buffer and
the leadingslash flag, which indicates if the src (and the dest) have a
leading slash, and are therefore not relative paths.
Basically, these sequences: "<a><path-segment>..<b>" and "<a>.<b>" both
turn into "<a><b>" where <a> is beginning-or-string or a complete segment,
and <b> is end-of-string or a complete segment.
e.g.
path1/../path2 -> path2
/path1/../path2 -> /path2
/path1/path2/.. -> /path1
path1/./path2 -> path1/path2
path1/path2/. -> path1/path2
./path1/path2 -> path1/path2
./path1 -> path1
/./path1 -> /path1
It is also a warning to pass a path whose returned value needs to be
truncated to fit into max_dest_len characters.
mep - 4/15/96
*---------------------------------------------------------------------------*/
/* types of path segment */
#define NORMAL 0
#define DOT 1
#define DOTDOT 2
#define ZAP 3
#define ERROR 4
/* We statically allocate this many - if we need more, we dynamically */
/* allocate them. */
#define STATIC_PATH_LEVELS 256
static int
ink_web_remove_dots(char *src, char *dest, int *leadingslash, int max_dest_len)
{
char *ptr, *end;
int free_flag = 0;
int scount, segstart, zapflag, doff, num;
int temp, i;
int error = 0;
/* offsets to each path segment */
char **seg, *segstatic[STATIC_PATH_LEVELS];
/* type of each segment is a ".." */
int *type, typestatic[STATIC_PATH_LEVELS];
*leadingslash = 0;
/* first quickly count the "/"s to get lower bound on # of path levels */
ptr = src;
end = src + strlen(src);
scount = 0;
while (ptr < end) {
if (*ptr++ == '/') {
scount++;
}
}
scount++; /* adding one to this makes it a lower bound for any case */
if (scount <= STATIC_PATH_LEVELS) {
/* we can use the statically allocated ones */
seg = segstatic;
type = typestatic;
} else {
/* too many levels of path - must dynamically allocate */
seg = (char **)malloc(scount * sizeof(char *));
type = (int *)malloc(scount * sizeof(int));
free_flag = 1;
}
/* Determine starts of each path segment.
* A segment is defined as:
* "foo/" in the string "<a>foo/<b>", where:
* <a> is <start-of-string>, or a single "/"
* <b> is <end-of-string>, or another segment.
* "foo" can be "." or ".."
* Makes my head hurt just to think about it.
*
*/
ptr = src;
scount = 0;
/* a segstart starts with start-of-string or a '/' */
segstart = 1;
while (ptr < end) {
if (*ptr == '/') {
/* include leading '/' in first segment */
if (ptr == src) {
*leadingslash = 1;
}
segstart = 1;
} else if (segstart == 1) {
seg[scount++] = ptr;
segstart = 0;
} else {
/* this is neither a "/" nor the first char of another segment */
}
ptr++;
}
/* Now scount is an accurate count of the segments we have found, */
/* not just that lower bound we quickly got before */
/* now figure out if segments are "..", ".", or normal */
/* ZAP the "."s in place */
for (i = 0; i < scount; i++) {
ptr = seg[i];
if (*ptr == '.') {
if ((ptr == end - 1) || (*(ptr + 1) == '/')) {
/* it's a "." */
type[i] = DOT;
} else if (((ptr == end - 2) && (*(ptr + 1) == '.')) || ((ptr < end - 2) && (*(ptr + 1) == '.') && (*(ptr + 2) == '/'))) {
/* it's a ".." */
type[i] = DOTDOT;
} else {
type[i] = NORMAL;
}
} else {
/* it's not a special segment */
type[i] = NORMAL;
}
}
/* now ZAP each DOT, and each NORMAL following a DOTDOT */
for (i = 0; i < scount; i++) {
if (type[i] == DOT) {
type[i] = ZAP;
} else if (type[i] == DOTDOT) {
/* got a DOTDOT, count back to find first NORMAL segment */
temp = i - 1;
zapflag = 0;
while ((temp >= 0) && (zapflag == 0)) {
if (type[temp] == NORMAL) {
/* found a NORMAL one, ZAP this pair */
type[temp] = ZAP;
type[i] = ZAP;
zapflag = 1;
} else {
temp--;
}
}
if (zapflag == 0) {
type[i] = ERROR;
error = 1;
}
}
}
/* now write out the fixed path */
doff = 0;
*dest = 0;
if (*leadingslash) {
strncpy(dest + doff, "/", 2);
doff++;
}
for (i = 0; i < scount; i++) {
if ((type[i] == NORMAL) || (type[i] == ERROR)) {
if (i == scount - 1) {
num = (int)(end - seg[i]);
} else {
num = (int)(seg[i + 1] - seg[i]);
}
/* truncate if nec. */
if (doff + num > max_dest_len) {
num = max_dest_len - doff;
}
strncpy(dest + doff, seg[i], num + 1);
doff += num;
} else if (type[i] == DOT) {
/* if you get here, it indicates an algorithmic error in this routine */
panic("ink_web_remove_dots - single dot remaining in string");
} else if (type[i] == DOTDOT) {
/* if you get here, it indicates an algorithmic error in this routine */
panic("ink_web_remove_dots - double dot remaining in string");
}
}
if (free_flag) {
free(seg);
free(type);
}
return (error);
}
/*---------------------------------------------------------------------------*
int ink_web_unescapify_string(...)
Takes a string that has has special characters turned to %AB format
and converts them back to single special characters. See
ink_web_escapify_string() above.
mep - 4/15/96
*---------------------------------------------------------------------------*/
static int
ink_web_unescapify_string(char *dest_in, char *src_in, int max_dest_len)
{
char *src = src_in;
char *dest = dest_in;
const char *c1;
const char *c2;
int quit = 0;
int dcount = 0;
int num = 0;
int dig1 = 0;
int dig2 = 0;
while ((*src != 0) && !quit) {
if (*src == '%') {
/* found start of an escape sequence, unescape it */
if ((*(src + 1) != 0) && (*(src + 2) != 0)) {
c1 = strchr(hexdigits, *(src + 1));
c2 = strchr(hexdigits, *(src + 2));
if ((c1 == nullptr) || (c2 == nullptr)) {
ink_warning("got escape sequence but no hex digits in:%s", src_in);
if (dcount + 1 < max_dest_len) {
*(dest++) = *src;
dcount++;
} else {
ink_warning("ink_web_unescapify_string had to truncate:%s", src_in);
quit = 1;
}
} else {
/* check if hex digits lowercase */
dig1 = (int)(c1 - hexdigits);
dig2 = (int)(c2 - hexdigits);
if (dig1 > 15) {
dig1 -= 6;
}
if (dig2 > 15) {
dig2 -= 6;
}
/* this is the ascii char */
num = 16 * dig1 + dig2;
if (!strchr(dontunescapify, num)) {
/* unescapify the escape sequence you found */
if (dcount + 1 < max_dest_len) {
*(dest++) = num;
dcount++;
src += 2;
} else {
ink_warning("ink_web_escapify_string had to truncate:%s", src_in);
quit = 1;
}
} else {
/* don't unescapify these, just pass the escape sequence */
if (dcount + 3 < max_dest_len) {
*(dest++) = '%';
*(dest++) = hexdigits[dig1];
*(dest++) = hexdigits[dig2];
dcount += 3;
src += 2;
} else {
ink_warning("ink_web_unescapify_string had to truncate:%s", src_in);
quit = 1;
}
}
}
} else {
ink_warning("got escape sequence but no hex digits (too near end of string) in:%s", src_in);
if (dcount + 1 < max_dest_len) {
*dest++ = *src;
dcount++;
} else {
ink_warning("ink_web_unescapify_string had to truncate:%s", src_in);
quit = 1;
}
}
} else {
if (dcount + 1 < max_dest_len) {
*dest++ = *src;
dcount++;
} else {
ink_warning("ink_web_unescapify_string had to truncate:%s", src_in);
quit = 1;
}
}
src++;
}
/* terminate string */
if (dcount < max_dest_len) {
*dest = 0;
} else {
*(dest_in + max_dest_len) = 0;
}
return (quit);
}
/*---------------------------------------------------------------------------*
int ink_web_escapify_string(...)
This functions takes an input src_in and converts all special
characters to %<hexdigit><hexdigit> form.
Special characters are everything that is not:
#$-_.+!*'(),;/?:@=& or
<alpha-char> or
<digit-char>
e.g. "abcd fghi[klmn^" -> "abcd%20fghi%5Bklmn%5E"
You must supply the buffer dest_in, with a size of max_dest_len. If
the unescapified string grows larger than this, it will be truncated
and you will get a warning.
mep - 4/15/96
*---------------------------------------------------------------------------*/
static int
ink_web_escapify_string(char *dest_in, char *src_in, int max_dest_len)
{
int d1, d2;
char *src = src_in;
char *dest = dest_in;
int dcount = 0;
int quit = 0;
while ((*src != 0) && (dcount < max_dest_len) && (quit == 0)) {
if ((char *)memchr(dontescapify, *src, INT_MAX) || ParseRules::is_alpha(*src) || ParseRules::is_digit(*src)) {
/* this is regular character, don't escapify it */
if (dcount + 1 < max_dest_len) {
*dest++ = *src;
dcount++;
} else {
ink_warning("ink_web_escapify_string (1) had to truncate:'%s'", src_in);
quit = 1;
}
} else {
d1 = *src / 16;
d2 = *src % 16;
if (dcount + 3 < max_dest_len) {
*dest++ = '%';
*dest++ = hexdigits[d1];
*dest++ = hexdigits[d2];
/* fprintf(stderr,"%d %d %c %c\n",d1,d2,hexdigits[d1],hexdigits[d2]);*/
dcount += 3;
} else {
ink_warning("ink_web_escapify_string (2) had to truncate:'%s'", src_in);
quit = 1;
}
}
src++;
}
/* terminate string */
if (dcount < max_dest_len) {
*dest = 0;
} else {
*(dest_in + max_dest_len - 1) = 0;
}
return (quit);
}